before you start: .rs.restartR()
GitClonePath <- "/Users/admin/Desktop/ElbeEstuarineZander"
setwd(GitClonePath)
# R package management tools such as packrat or renv. These tools allow you to create isolated environments with specific versions of R and installed packages, ensuring reproducibility and avoiding conflicts with other packages installed in your system.
#install.packages("packrat")
#packrat::init()
#packrat::restore()
# if (!require("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
# BiocManager::install(c(
# "DESeq2",
# "AnnotationDbi",
# "AnnotationHub",
# "Biobase",
# "GSEABase",
# "DECIPHER",
# "ComplexHeatmap",
# "Biostrings",
# "BiocParallel",
# "BiocNeighbors",
# "BiocFileCache",
# "BiocBaseUtils",
# "GO.db",
# "GOSemSim",
# "ShortRead",
# "SummarizedExperiment",
# "TreeSummarizedExperiment",
# "phyloseq",
# "tidyverse",
# "plyr",
# "dplyr",
# "DESeq2",
# "cowplot",
# "ggrepel",
# "PCAtools",
# "ComplexHeatmap",
# "WGCNA",
# "dada2",
# "ShortRead",
# "phyloseq",
# "factoextra",
# "microbiome", #clr transformation
# "mia",
# "clusterProfiler",
# "GenomeInfoDbData"
# ), force =T)
#New in this Part:
# if (!require("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
# BiocManager::install(c(
# "sva",
# "fgsea",
# "org.Hs.eg.db"
# ), force=T)
Most of the packages needed here are stored in the packrat::restore
Date <- "16.01.2024"
Species <- "SL"
Analysis <- "WGCNA"
################
#Bacteria Input#
pslist <- readRDS(file.path(path_Output_16S,paste(paste( "SL_2021_Summer_16S_merge_Filter_ASV_besttax", Date, sep="_"), ".rds", sep="")))
pslistraw <- readRDS(file.path(path_Output_16S,paste(paste("SL_2021_Summer_ps_16S_merge_pslistraw", Date, sep="_"), ".rds", sep="")))
##################################################
#AnnotationFile Created in Sections 1.1.3 - 1.1.5#
SLUCGeneManual <- read.csv2(file.path(path_Input_RNA, "SLUCGene-Manual-17.06.2023.csv"))[-1]
#############
#Sample File#
SAMDF<- read.table(file=file.path(path_Input_RNA, "SL_samplefile_04.01.2024.csv") ,sep=";",dec = ".")
#####################
#Count and TPM files#
cnt_RNA_Gill <- readRDS(file.path(path_Output_RNA, "SL_Gill_featurecounts_countsNoRibo_08.01.24.rds"))
cnt_RNA_Liver <- readRDS(file.path(path_Output_RNA, "SL_Liver_featurecounts_countsNoRibo_08.01.24.rds"))
tpm_RNA_Liver <- readRDS(file.path(path_Output_RNA, "SL_Liver_featurecounts_tpm_NoRibo_08.01.24.rds"))
tpm_RNA_Gill <- readRDS(file.path(path_Output_RNA, "SL_Gill_featurecounts_tpm_NoRibo_08.01.24.rds"))
tpms <- list("tpmGill "= tpm_RNA_Gill, "tpmLiver" = tpm_RNA_Liver)
################################################
#Assign the SL_RNA Output to Global Environment#
save_name_RNA_Deseq2 <- "SL_2021_Summer" #from SL_RNA_16.01.24_Paper.Rmd
DeseqLFC005 <- readRDS(file = paste0(file.path(path_Output_RNA, paste(paste(paste(save_name_RNA_Deseq2, "DEG_Replicates_Outlier_pairwise", sep="_"), paste("LFC", "0.05", sep=""), Date, sep="_"), ".rds", sep=""))))
DeseqLFC1 <- readRDS(file = paste0(file.path(path_Output_RNA, paste(paste(paste(save_name_RNA_Deseq2, "DEG_Replicates_Outlier_pairwise", sep="_"), paste("LFC", "1", sep=""), Date, sep="_"), ".rds", sep=""))))
traitData<- c(
"FultonK", "Length","Weight",
"StomachContent",
"HSI", "SSI",
"O2","Salinity","SecchiDepth")
#Sample Files
library(readxl)
library(tidyverse)
library(ggplot2)
SAMDF_16S<-dplyr::filter(SAMDF, DNA16S == "yes")
rownames(SAMDF_16S) <- SAMDF_16S$SampleID
# Get all IDs present
SAMDF_RNA_Gill<-dplyr::filter(SAMDF, RNAseq == "yes")
rownames(SAMDF_RNA_Gill) <- SAMDF_RNA_Gill$SampleID
SAMDF_RNA_Liver<-dplyr::filter(SAMDF, RNAseq == "yes")
rownames(SAMDF_RNA_Liver) <- SAMDF_RNA_Liver$SampleID
SAMDF_RNA_Consensus<-dplyr::filter(SAMDF, RNAseq == "yes")
rownames(SAMDF_RNA_Consensus) <- SAMDF_RNA_Consensus$SampleID
OutlineColor <- "grey20" #"white"
#-
set.seed(123)
Species <- "SL"
Tissue <- "Gill"
Year <- "2021"
Season <- "Summer"
Type <- "16S"
Analysis <- "WGCNA"
alpha <- 0.05
OperatingSystem <- "Windows"
prefix <- "SSU-"
save_name_16S <- paste(Species, Year, Season, Tissue, Type, Analysis, sep = "_")
paste0(file.path(path_Output_WGCNA), save_name_16S, "_.RData")
## [1] "/Users/admin/Desktop/ElbeEstuarineZander/SL_Output_WGCNASL_2021_Summer_Gill_16S_WGCNA_.RData"
# The following setting is important, do not omit.
options(stringsAsFactors = FALSE);
#Read in the female liver data set
set.seed(123)
Species <- "SL"
Year <- "2021"
Season <- "Summer"
Type <- "RNA"
Tissue <- "Gill"
Analysis <- "WGCNA"
alpha <- 0.05
OperatingSystem <- "Windows"
prefix <- "RNAseq-"
#####################
Tissue <- "Gill"
variable <- "Replicates"
#####################
save_name_RNA_Gill <- paste(Species, Year,Season, Tissue, Type, Analysis, sep = "_")
paste0(file.path(path_Output_WGCNA, paste(Analysis, "_", sep="")), save_name_RNA_Gill, ".RData")
## [1] "/Users/admin/Desktop/ElbeEstuarineZander/SL_Output_WGCNA/WGCNA_SL_2021_Summer_Gill_RNA_WGCNA.RData"
##################
#Sample selection#
##################
cat(paste(shQuote(SAMDF_RNA_Gill$SampleID, type="cmd"), collapse=", "))
## "SLSU21MGEB1", "SLSU21MGEB2", "SLSU21MGEB3", "SLSU21MGEB4", "SLSU21MGEB5", "SLSU21MGEB7", "SLSU21BBEB1", "SLSU21BBEB2", "SLSU21BBEB4", "SLSU21BBEB6", "SLSU21BBEB7", "SLSU21BBEB9", "SLSU21SSEB2", "SLSU21SSEB3", "SLSU21SSEB5", "SLSU21SSEB6", "SLSU21SSEB7", "SLSU21SSEB9", "SLSU21MLEB1", "SLSU21MLEB2", "SLSU21MLEB5", "SLSU21MLEB6", "SLSU21MLEB7", "SLSU21MLEB9"
# "SLSU21MLEB1", "SLSU21MLEB2", "SLSU21MLEB5", "SLSU21MLEB6", "SLSU21MLEB7", "SLSU21MLEB9")
Samples_RNA_Gill_All<-c(
"SLSU21MGEB1", "SLSU21MGEB2", "SLSU21MGEB3", "SLSU21MGEB4", "SLSU21MGEB5", "SLSU21MGEB7",
"SLSU21BBEB1", "SLSU21BBEB2", "SLSU21BBEB4", "SLSU21BBEB6", "SLSU21BBEB9", "SLSU21BBEB7",
"SLSU21SSEB2", "SLSU21SSEB3", "SLSU21SSEB5", "SLSU21SSEB6", "SLSU21SSEB7", "SLSU21SSEB9",
"SLSU21MLEB1", "SLSU21MLEB2", "SLSU21MLEB5", "SLSU21MLEB6", "SLSU21MLEB7", "SLSU21MLEB9")
Samples_RNA_Gill<-c(
"SLSU21MGEB1", "SLSU21MGEB2", "SLSU21MGEB4", "SLSU21MGEB7",
"SLSU21BBEB1", "SLSU21BBEB2", "SLSU21BBEB4", "SLSU21BBEB6", "SLSU21BBEB9", "SLSU21BBEB7",
"SLSU21SSEB2", "SLSU21SSEB3", "SLSU21SSEB5", "SLSU21SSEB7", "SLSU21SSEB9",
"SLSU21MLEB1", "SLSU21MLEB2", "SLSU21MLEB5", "SLSU21MLEB6", "SLSU21MLEB7", "SLSU21MLEB9")
#"SLSU21MGEB5" outlier in sample Clustering
#"SLSU21SSEB6", doubble outlier from the group in Liver & Gill
#"SLSU21MGEB3", doubble outlier from the group in Liver & Gill
# The following setting is important, do not omit.
options(stringsAsFactors = FALSE);
#Read in the female liver data set
set.seed(123)
Species <- "SL"
Year <- "2021"
Season <- "Summer"
Type <- "RNA"
Tissue <- "Liver"
Analysis <- "WGCNA"
alpha <- 0.05
OperatingSystem <- "Windows"
prefix <- "RNAseq-"
#####################
Tissue <- "Liver"
variable <- "Replicates"
#####################
save_name_RNA_Liver <- paste(Species, Year,Season, Tissue, Type, Analysis, sep = "_")
paste0(file.path(path_Output_WGCNA, paste(Analysis, "_", sep="")), save_name_RNA_Liver, ".RData")
## [1] "/Users/admin/Desktop/ElbeEstuarineZander/SL_Output_WGCNA/WGCNA_SL_2021_Summer_Liver_RNA_WGCNA.RData"
##################
#Sample selection#
##################
cat(paste(shQuote(SAMDF_RNA_Liver$SampleID, type="cmd"), collapse=", "))
## "SLSU21MGEB1", "SLSU21MGEB2", "SLSU21MGEB3", "SLSU21MGEB4", "SLSU21MGEB5", "SLSU21MGEB7", "SLSU21BBEB1", "SLSU21BBEB2", "SLSU21BBEB4", "SLSU21BBEB6", "SLSU21BBEB7", "SLSU21BBEB9", "SLSU21SSEB2", "SLSU21SSEB3", "SLSU21SSEB5", "SLSU21SSEB6", "SLSU21SSEB7", "SLSU21SSEB9", "SLSU21MLEB1", "SLSU21MLEB2", "SLSU21MLEB5", "SLSU21MLEB6", "SLSU21MLEB7", "SLSU21MLEB9"
# "SLSU21MLEB1", "SLSU21MLEB2", "SLSU21MLEB5", "SLSU21MLEB6", "SLSU21MLEB7", "SLSU21MLEB9")
Samples_RNA_Liver_All<-c(
"SLSU21MGEB1", "SLSU21MGEB2", "SLSU21MGEB3", "SLSU21MGEB4", "SLSU21MGEB5", "SLSU21MGEB7",
"SLSU21BBEB1", "SLSU21BBEB2", "SLSU21BBEB4", "SLSU21BBEB6", "SLSU21BBEB9", "SLSU21BBEB7",
"SLSU21SSEB2", "SLSU21SSEB3", "SLSU21SSEB5", "SLSU21SSEB6", "SLSU21SSEB7", "SLSU21SSEB9",
"SLSU21MLEB1", "SLSU21MLEB2", "SLSU21MLEB5", "SLSU21MLEB6", "SLSU21MLEB7", "SLSU21MLEB9")
Samples_RNA_Liver<-c(
"SLSU21MGEB1", "SLSU21MGEB2", "SLSU21MGEB4", "SLSU21MGEB5", "SLSU21MGEB7",
"SLSU21BBEB1", "SLSU21BBEB2", "SLSU21BBEB4", "SLSU21BBEB6", "SLSU21BBEB9", "SLSU21BBEB7",
"SLSU21SSEB2", "SLSU21SSEB3", "SLSU21SSEB5", "SLSU21SSEB7", "SLSU21SSEB9",
"SLSU21MLEB1", "SLSU21MLEB2", "SLSU21MLEB5", "SLSU21MLEB6", "SLSU21MLEB7", "SLSU21MLEB9")
#"SLSU21SSEB6", doubble outlier from the group in Liver & Gill
#"SLSU21MGEB3", doubble outlier from the group in Liver & Gill
#-
library(WGCNA)
library(tidyverse)
library(DESeq2)
options(stringsAsFactors = FALSE);
###########
#Load data#
###########
SSUWGCNAlist <- readRDS(file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_16S, "SSU_List_2", Date, sep="_"), ".rds", sep=""))))
SSU_Gill_WGCNA <- readRDS(file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_16S, "SSU_List", Date, sep="_"), ".rds", sep=""))))
RNA_Gill_WGCNA <- readRDS(file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_RNA_Gill, "List_3", Date, sep="_"), ".rds", sep=""))))
RNA_Liver_WGCNA <- readRDS(file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_RNA_Liver, "List_3", Date, sep="_"), ".rds", sep=""))))
WGCNA_list <- list("SSU_Gill_WGCNA" =SSU_Gill_WGCNA, "RNA_Gill_WGCNA" = RNA_Gill_WGCNA, "RNA_Liver_WGCNA" =RNA_Liver_WGCNA)
WGCNA_list$SSU_Gill_WGCNA$SSU_Tax <- SSUWGCNAlist
###########
#Save Data#
###########
saveRDS(WGCNA_list, file = paste0(file.path(path_Output_WGCNA, paste(paste(Species, Year, Season, Analysis, Date, sep="_"), ".rds", sep=""))))
WGCNA_list <- readRDS(file = paste0(file.path(path_Output_WGCNA, paste(paste(Species, Year, Season, Analysis, Date, sep="_"), ".rds", sep=""))))
##################
#Prepare Datasets#
##################
######
#Gill#
######
require(WGCNA); require(tidyverse)
RNA_MEs_Gill <- RNA_Gill_WGCNA$RNA_Gill_MEs[rownames(RNA_Gill_WGCNA$RNA_Gill_MEs) %in%
intersect(rownames(SSU_Gill_WGCNA$SSU_Gill_MEs), rownames(SSU_Gill_WGCNA$SSU_Gill_MEs)),]
RNA_MEs_Gill <- RNA_MEs_Gill %>% dplyr::arrange(factor(rownames(RNA_MEs_Gill), levels = Samples_RNA_Gill))
RNA_MEs_Gill <- orderMEs(RNA_MEs_Gill)
names(RNA_MEs_Gill) <- paste("RNA",sub("ME", "", names(RNA_MEs_Gill)), sep="")
SSU_MEs_Gill <- SSU_Gill_WGCNA$SSU_Gill_MEs[rownames(SSU_Gill_WGCNA$SSU_Gill_MEs) %in%
intersect(rownames(SSU_Gill_WGCNA$SSU_Gill_MEs), rownames(RNA_Gill_WGCNA$RNA_Gill_MEs)),]
SSU_MEs_Gill <-SSU_MEs_Gill %>% dplyr::arrange(factor(rownames(SSU_MEs_Gill), levels = Samples_RNA_Liver))
names(SSU_MEs_Gill) <- paste("SSU",sub("ME", "", names(SSU_MEs_Gill)), sep="")
SSUOrder <- c("SSU0","SSU1","SSU2","SSU3","SSU4","SSU5","SSU6","SSU7","SSU8", "SSU9","SSU10","SSU11", "SSU12","SSU13","SSU14")
SSU_MEs_Gill <- SSU_MEs_Gill[, order(match(names(SSU_MEs_Gill), SSUOrder))]
SAM_MEs_Gill <- SAMDF_RNA_Gill[rownames(SAMDF_RNA_Gill) %in% rownames(RNA_MEs_Gill),]
SAM_MEs_Gill <- SAM_MEs_Gill %>% dplyr::arrange(factor(rownames(SAM_MEs_Gill), levels = Samples_RNA_Gill))
all(rownames(RNA_MEs_Gill) == rownames(SSU_MEs_Gill) & rownames(RNA_MEs_Gill) == rownames(SAM_MEs_Gill))
## [1] TRUE
#######
#Liver#
#######
RNA_MEs_Liver <- RNA_Liver_WGCNA$RNA_Liver_MEs[rownames(RNA_Liver_WGCNA$RNA_Liver_MEs) %in%
intersect(rownames(SSU_Gill_WGCNA$SSU_Gill_MEs), rownames(SSU_Gill_WGCNA$SSU_Gill_MEs)),]
RNA_MEs_Liver <- RNA_MEs_Liver %>% dplyr::arrange(factor(rownames(RNA_MEs_Liver), levels = Samples_RNA_Liver))
RNA_MEs_Liver = orderMEs(RNA_MEs_Liver)
names(RNA_MEs_Liver) <- paste("RNA",sub("ME", "", names(RNA_MEs_Liver)), sep="")
SSU_MEs_Liver <- SSU_Gill_WGCNA$SSU_Gill_MEs[rownames(SSU_Gill_WGCNA$SSU_Gill_MEs) %in%
intersect(rownames(SSU_Gill_WGCNA$SSU_Gill_MEs), rownames(RNA_Liver_WGCNA$RNA_Liver_MEs)),]
SSU_MEs_Liver <-SSU_MEs_Liver%>% dplyr::arrange(factor(rownames(SSU_MEs_Liver), levels = Samples_RNA_Liver))
names(SSU_MEs_Liver) <- paste("SSU",sub("ME", "", names(SSU_MEs_Liver)), sep="")
SSUOrder <- c("SSU0","SSU1","SSU2","SSU3","SSU4","SSU5","SSU6","SSU7","SSU8", "SSU9","SSU10","SSU11", "SSU12","SSU13","SSU14")
SSU_MEs_Liver <- SSU_MEs_Liver[, order(match(names(SSU_MEs_Liver), SSUOrder))]
SAM_MEs_Liver <- SAMDF_RNA_Liver[rownames(SAMDF_RNA_Liver) %in% rownames(RNA_MEs_Liver),]
SAM_MEs_Liver <- SAM_MEs_Liver %>% dplyr::arrange(factor(rownames(SAM_MEs_Liver), levels = Samples_RNA_Liver))
all(rownames(RNA_MEs_Liver) == rownames(SSU_MEs_Liver) & rownames(RNA_MEs_Liver) == rownames(SAM_MEs_Liver))
## [1] TRUE
Gill_ME <- list("RNA_MEs_Gill" = RNA_MEs_Gill, "SSU_MEs_Gill" = SSU_MEs_Gill, "SAM_MEs_Gill" = SAM_MEs_Gill)
Liver_ME <- list("RNA_MEs_Liver" = RNA_MEs_Liver, "SSU_MEs_Liver" = SSU_MEs_Liver, "SAM_MEs_Liver" =SAM_MEs_Liver)
HeatmapDataset <- list("Gill_ME" = Gill_ME, "Liver_ME" = Liver_ME)
traitData<- c(
"FultonK", "Length","Weight",
"StomachContent",
"HSI", "SSI",
"O2","Salinity","SecchiDepth")
ModsOfInterst_list <- list()
for (WGCNAset in names(WGCNA_list)) {
##############
#Create Setup#
##############
ModsOfInterst_list_length <- length(ModsOfInterst_list)
Tissue <- sub(paste0(".*", "(Gill|Liver)", ".*"), "\\1", WGCNAset)
Type <- sub(paste0(".*", "(RNA|SSU)", ".*"), "\\1", WGCNAset)
omics_data <- as.data.frame(WGCNA_list[[WGCNAset]][grepl("omics_data", names(WGCNA_list[[WGCNAset]]))][[1]])
MEs <- as.data.frame(WGCNA_list[[WGCNAset]][grepl("MEs", names(WGCNA_list[[WGCNAset]]))][[1]])
SAM <- as.data.frame(WGCNA_list[[WGCNAset]][grepl("datTraits", names(WGCNA_list[[WGCNAset]]))][[1]])
if (Type != "SSU") {
ANNO <- WGCNA_list[[WGCNAset]][grepl("GeneAnno", names(WGCNA_list[[WGCNAset]]))][[1]]
}
###############################################################################
#Create Dataframes of significant correlation between RNAModules and TraitData#
###############################################################################
SAM_traits <- SAM[names(SAM) %in% traitData]
SAM_traits <- SAM_traits[, traitData]
p.value_matr <- corr.value_matr <- matrix(ncol = ncol(SAM_traits),
nrow = ncol(MEs),
dimnames = list(colnames(MEs),
colnames(SAM_traits)))
for(ii in 1:ncol(MEs)){
for(j in 1:ncol(SAM_traits)){
cor.res <- cor.test(MEs[,ii], SAM_traits[,j])
p.value_matr[ii, j] <- cor.res$p.value
corr.value_matr[ii, j] <- cor.res$estimate
}
}
# Correct for number of tests
p.value_matr.adjust <- p.adjust(p.value_matr, method = "fdr")
dim(p.value_matr.adjust) <- dim(p.value_matr)
dimnames(p.value_matr.adjust) <- list(colnames(MEs), colnames(SAM_traits))
# Collect all results into a list.
Traits_corr <- list(p_value =as.data.frame(p.value_matr),
p_value_adj = as.data.frame(p.value_matr.adjust),
correlation = as.data.frame(corr.value_matr))
ModsOfInterst <- list()
for (iii in names(Traits_corr$correlation)){
aa <- length(ModsOfInterst)
if (iii %in% c("O2", "SecchiDepth")) {
A <- Traits_corr$correlation[iii][Traits_corr$correlation[iii] < 0, , drop=FALSE]
B <- Traits_corr$p_value[iii][Traits_corr$p_value[iii] < 0.05, , drop = FALSE]
BB <- B[rownames(B) %in% rownames(A), , drop = FALSE]
BBB <- cbind(A[rownames(A) %in% rownames(BB), , drop = FALSE], BB)
names(BBB) <- c("correlation", "p_value")
}
else {
BB <- Traits_corr$p_value[iii][Traits_corr$p_value[iii] < 0.05, , drop = FALSE]
AA <- Traits_corr$correlation[iii][rownames(Traits_corr$correlation[iii]) %in%
rownames(BB), , drop=FALSE]
BBB <- cbind(AA, BB)
names(BBB) <- c("correlation", "p_value")
}
ModsOfInterst[[aa+1]] <- BBB
names(ModsOfInterst)[[aa+1]] <- iii
}
ModsOfInterst<- Filter(function(df) nrow(df) > 0, ModsOfInterst)
ModsOfInterst_list[[ModsOfInterst_list_length+1]] <- ModsOfInterst
names(ModsOfInterst_list)[[ModsOfInterst_list_length+1]] <- paste("ModsOfInterest", Tissue, Type, sep="_")
}
###########
#Save Data#
###########
saveRDS(ModsOfInterst_list, file = paste0(file.path(path_Output_WGCNA, paste(paste(Species, Year, Season, Analysis, "ModsOfInterst_list", Date, sep="_"), ".rds", sep=""))))
ModsOfInterst_list <- readRDS(file = paste0(file.path(path_Output_WGCNA, paste(paste(Species, Year, Season, Analysis, "ModsOfInterst_list", Date, sep="_"), ".rds", sep=""))))
##########################
#IntegratedHeatmapDataset# #Full and Subsetted for significant correlation
##########################
traitData<- c(
"FultonK", "Length","Weight",
"StomachContent",
"HSI", "SSI",
"O2","Salinity","SecchiDepth")
hmps_list <- list()
for (i in names(HeatmapDataset)[grepl("ME", names(HeatmapDataset))]) {
g <- i
gg <- sub("_ME", "", g) #print(gg)
SAM_MEs <- HeatmapDataset[[i]][[paste("SAM_MEs", gg, sep="_")]]
SSU_MEs <- HeatmapDataset[[i]][[paste("SSU_MEs", gg, sep="_")]]
RNA_MEs <- HeatmapDataset[[i]][[paste("RNA_MEs", gg, sep="_")]]
hmps_length <- length(hmps_list)
FILENAME <- paste(paste(Species, Year, Season, Analysis, "IntegratedHeatmap", gg,Date, sep="_"),".png", sep="")
WGCNA_IntegratedHeatmap_RK(
RNA_MEs = RNA_MEs,
SAM_MEs = SAM_MEs,
SSU_MEs = SSU_MEs,
WIDTH = 14,
HEIGHT = 10,
OutlineColor = "grey20",
traitData = traitData)
hmps_list[[hmps_length+1]] <- ht_list
names(hmps_list)[[hmps_length+1]] <- paste("ht_list", gg, sep="_")
###############################################################################
#Create Dataframes of significant correlation between RNAModules and TraitData#
###############################################################################
SAM_MEs_traits <- SAM_MEs[names(SAM_MEs) %in% traitData]
SAM_MEs_traits <- SAM_MEs_traits[, traitData]
p.value_matr <- corr.value_matr <- matrix(ncol = ncol(SAM_MEs_traits),
nrow = ncol(RNA_MEs),
dimnames = list(colnames(RNA_MEs),
colnames(SAM_MEs_traits)))
for(ii in 1:ncol(RNA_MEs)){
for(j in 1:ncol(SAM_MEs_traits)){
if(colnames(SAM_MEs_traits)[j] == "Day"){
cor.res <- cor.test(RNA_MEs[,ii], SAM_MEs_traits[,j], method = "spearman", exact = FALSE)
p.value_matr[ii, j] <- cor.res$p.value
corr.value_matr[ii, j] <- cor.res$estimate
} else{
cor.res <- cor.test(RNA_MEs[,ii], SAM_MEs_traits[,j])
p.value_matr[ii, j] <- cor.res$p.value
corr.value_matr[ii, j] <- cor.res$estimate
}}}
# Correct for number of tests
p.value_matr.adjust <- p.adjust(p.value_matr, method = "fdr")
dim(p.value_matr.adjust) <- dim(p.value_matr)
dimnames(p.value_matr.adjust) <- list(colnames(RNA_MEs), colnames(SAM_MEs_traits))
# Collect all results into a list.
Traits_corr_RNA <- list(p_value =as.data.frame(p.value_matr),
p_value_adj = as.data.frame(p.value_matr.adjust),
correlation = as.data.frame(corr.value_matr))
ModsOfInterst <- list()
for (iii in names(Traits_corr_RNA$correlation)){
aa <- length(ModsOfInterst)
if (iii %in% c("O2", "SecchiDepth")) {
A <- Traits_corr_RNA$correlation[iii][Traits_corr_RNA$correlation[iii] < 0, , drop=FALSE]
B <- Traits_corr_RNA$p_value[iii][Traits_corr_RNA$p_value[iii] < 0.05, , drop = FALSE]
BB <- B[rownames(B) %in% rownames(A), , drop = FALSE]
BBB <- cbind(A[rownames(A) %in% rownames(BB), , drop = FALSE], BB)
}
else {
BB <- Traits_corr_RNA$p_value[iii][Traits_corr_RNA$p_value[iii] < 0.05, , drop = FALSE]
AA <- Traits_corr_RNA$correlation[iii][rownames(Traits_corr_RNA$correlation[iii]) %in%
rownames(BB), , drop=FALSE]
BBB <- cbind(AA, BB)
}
ModsOfInterst[[aa+1]] <- BBB
names(ModsOfInterst)[[aa+1]] <- iii
}
ModsOfInterst<- Filter(function(df) nrow(df) > 0, ModsOfInterst)
#print(paste("O2 Correlation", rownames(ModsOfInterst$O2)))
cleaned_ModsOfInterst <- list()
for (NonZero in names(ModsOfInterst)) {
if ("RNA0" %in% rownames(ModsOfInterst[[NonZero]])) {
cleaned_ModsOfInterst[[NonZero]] <-
ModsOfInterst[[NonZero]][-which(rownames(ModsOfInterst[[NonZero]]) == "RNA0"), , drop= FALSE]
} else {
cleaned_ModsOfInterst[[NonZero]] <- ModsOfInterst[[NonZero]]
}
}
###########################################
#Kick out negative correlation to Salinity#
cleaned_ModsOfInterst$Salinity <-
cleaned_ModsOfInterst$Salinity[cleaned_ModsOfInterst$Salinity[1] > 0, , drop = FALSE]
#######################################
#Create Integrated Heatmaps of Subsets#
#######################################
for (iiii in names(cleaned_ModsOfInterst)) {
hmps_length <- length(hmps_list)
FILENAME <- paste(paste(Species, Year, Season, Analysis, "IntegratedHeatmap", gg, iiii, Date, sep="_"),".png", sep="")
RNA_MEs_sub <- RNA_MEs[,names(RNA_MEs) %in% rownames(cleaned_ModsOfInterst[[iiii]])]
HEIGHT <- 2+0.2*length(rownames(cleaned_ModsOfInterst[[iiii]]))
WGCNA_IntegratedHeatmap_RK(
RNA_MEs = RNA_MEs_sub,
SAM_MEs = SAM_MEs,
SSU_MEs = SSU_MEs,
WIDTH = 14,
HEIGHT = HEIGHT,
OutlineColor = "grey20",
traitData = traitData)
hmps_list[[hmps_length+1]] <- ht_list
names(hmps_list)[[hmps_length+1]] <- paste("ht_list", gg, iiii, sep="_")
}
}
#Gill
hmps_list$ht_list_Gill
#Liver
hmps_list$ht_list_Liver
#############################################
#Combined Liver & Gill For specific Abiotics#
#############################################
# prow <- cowplot::plot_grid(grid.grabExpr(ComplexHeatmap::draw(hmps_list$ht_list_Gill_O2, auto_adjust = FALSE, background = "transparent", heatmap_legend_side = "left", annotation_legend_side = "bottom")), grid.grabExpr(ComplexHeatmap::draw(hmps_list$ht_list_Liver_O2, auto_adjust = FALSE, background = "transparent", heatmap_legend_side = "left", annotation_legend_side = "bottom")),
# ncol = 1)
# ggsave(prow, filename = paste(paste(save_name_RNA_Gill, "SL_Oxy_WGCNA_Integrated_Heatmap", Date, sep="_"),".png", sep=""), path = pathPlots , device='png', dpi=300, width = 14,
# height = 9)
# prow
#-
Lets check our understanding of these measurements: 1) kTotal - connectivity of the each gene based on its r-values to all other genes in the whole network 2) kWithin - connectivity of the each gene within a single module based on its r-values to all other genes within the same module 3) and 4) kOut and kDiff mathematical derivatives from 1) and 2) So, let say WGCNA identified 10 modules, but kWithin for “Module 2” is the largest and obviously larger than kTotal. This suggest “Module 2” to be a core of the network, or more important. Let say You perform functional annotation of the genes in this module and will find them to be enriched in amino acid analysis. And Your study was dedicated to metabolic changes under some conditions. So You can make conclusion that Your condition strongly affects AA metabolism, and via it - whole network. In contrast, high kOut can suggest that total connectivity is much larger that connectivity within modules, and I would say - reflects sort of network’s stability under tested conditions. So a set of vertices with high kOut can be targets for annotation as hubs that determined this stability. And elimination of them (knockout mutations, for example) can break the whole network.
st_Gill_RNA <- 6
st_Liver_RNA <- 3
st_Gill_SSU <- 6
traitData<- c(
"FultonK", "Length","Weight",
"HSI","SSI",
"O2","Salinity","SecchiDepth")
#####################
#Create Degrees_list#
#####################
degrees_list <- list()
for (WGCNAset in names(WGCNA_list)) {
require(WGCNA)
a <- length(degrees_list)
Tissue <- sub(paste0(".*", "(Gill|Liver)", ".*"), "\\1", WGCNAset)
Type <- sub(paste0(".*", "(RNA|SSU)", ".*"), "\\1", WGCNAset)
omics_data <- as.data.frame(WGCNA_list[[WGCNAset]][grepl("omics_data", names(WGCNA_list[[WGCNAset]]))][[1]])
MEs <- as.data.frame(WGCNA_list[[WGCNAset]][grepl("MEs", names(WGCNA_list[[WGCNAset]]))][[1]])
SAM <- as.data.frame(WGCNA_list[[WGCNAset]][grepl("datTraits", names(WGCNA_list[[WGCNAset]]))][[1]])
ANNO <- WGCNA_list[[WGCNAset]][grepl("GeneAnno", names(WGCNA_list[[WGCNAset]]))]
moduleColors <- WGCNA_list[[WGCNAset]][grepl("moduleColors", names(WGCNA_list[[WGCNAset]]))][[1]]
moduleLabels <- WGCNA_list[[WGCNAset]][grepl("moduleLabels", names(WGCNA_list[[WGCNAset]]))][[1]]
module_size<- as.data.frame(table(moduleLabels))
colnames(module_size) <- c("Module", "Size")
module_size %>% dplyr::mutate(Module_color = labels2colors(as.numeric(as.character(Module)))) -> module_size
if (Tissue == "Liver" && Type == "RNA") {st <- st_Liver_RNA
} else if (Tissue == "Gill" && Type == "RNA") {st <- st_Gill_RNA
} else if (Tissue == "Gill" && Type == "SSU") {st <- st_Gill_SSU
} else {st <- NA }
#print(st)
degrees <- intramodularConnectivity.fromExpr(as.data.frame(omics_data), colors = moduleColors, power = st,
networkType = "signed", distFnc = "bicor")
degrees_list[[a+1]] <- degrees
names(degrees_list)[[a+1]] <- paste("degrees", Tissue, Type, sep="_")
}
## softConnectivity: FYI: connecitivty of genes with less than 8 valid samples will be returned as NA.
## ..calculating connectivities..
## softConnectivity: FYI: connecitivty of genes with less than 7 valid samples will be returned as NA.
## ..calculating connectivities..
## softConnectivity: FYI: connecitivty of genes with less than 8 valid samples will be returned as NA.
## ..calculating connectivities..
# A correlation coefficient of . 10 is thought to represent a weak or small association; a correlation coefficient of . 30 is considered a moderate correlation; and a correlation coefficient of . 50 or larger is thought to represent a strong or large correlation.
###########
#Save Data#
###########
saveRDS(degrees_list, file = paste0(file.path(path_Output_WGCNA, paste(paste(Species, Year, Season, Analysis, "degrees_list", Date, sep="_"), ".rds", sep=""))))
degrees_list <-readRDS(file = paste0(file.path(path_Output_WGCNA, paste(paste(Species, Year, Season, Analysis, "degrees_list", Date, sep="_"), ".rds", sep=""))))
################################################
#Scatter GS vs ModuleMembership over all Traits#
################################################
scatter_list <- list()
for (WGCNAset in names(WGCNA_list)) {
##############
#Create Setup#
##############
tryCatch({
a <- length(scatter_list)
Tissue <- sub(paste0(".*", "(Gill|Liver)", ".*"), "\\1", WGCNAset)
Type <- sub(paste0(".*", "(RNA|SSU)", ".*"), "\\1", WGCNAset)
omics_data <- as.data.frame(WGCNA_list[[WGCNAset]][grepl("omics_data", names(WGCNA_list[[WGCNAset]]))][[1]])
MEs <- as.data.frame(WGCNA_list[[WGCNAset]][grepl("MEs", names(WGCNA_list[[WGCNAset]]))][[1]])
SAM <- as.data.frame(WGCNA_list[[WGCNAset]][grepl("datTraits", names(WGCNA_list[[WGCNAset]]))][[1]])
if (Type != "SSU") {
ANNO <- WGCNA_list[[WGCNAset]][grepl("GeneAnno", names(WGCNA_list[[WGCNAset]]))][[1]]
}
moduleColors <- WGCNA_list[[WGCNAset]][grepl("moduleColors", names(WGCNA_list[[WGCNAset]]))][[1]]
moduleLabels <- WGCNA_list[[WGCNAset]][grepl("moduleLabels", names(WGCNA_list[[WGCNAset]]))][[1]]
module_size<- as.data.frame(table(moduleLabels))
colnames(module_size) <- c("Module", "Size")
module_size %>% dplyr::mutate(Module_color = labels2colors(as.numeric(as.character(Module)))) -> module_size
degrees <- degrees_list[[paste("degrees", Tissue, Type, sep="_")]]
###############################################################################
#Create Dataframes of significant correlation between RNAModules and TraitData#
###############################################################################
SAM_traits <- SAM[names(SAM) %in% traitData]
SAM_traits <- SAM_traits[, traitData]
p.value_matr <- corr.value_matr <- matrix(ncol = ncol(SAM_traits),
nrow = ncol(MEs),
dimnames = list(colnames(MEs),
colnames(SAM_traits)))
for(ii in 1:ncol(MEs)){
for(j in 1:ncol(SAM_traits)){
cor.res <- cor.test(MEs[,ii], SAM_traits[,j])
p.value_matr[ii, j] <- cor.res$p.value
corr.value_matr[ii, j] <- cor.res$estimate
}
}
# Correct for number of tests
p.value_matr.adjust <- p.adjust(p.value_matr, method = "fdr")
dim(p.value_matr.adjust) <- dim(p.value_matr)
dimnames(p.value_matr.adjust) <- list(colnames(MEs), colnames(SAM_traits))
# Collect all results into a list.
Traits_corr <- list(p_value =as.data.frame(p.value_matr),
p_value_adj = as.data.frame(p.value_matr.adjust),
correlation = as.data.frame(corr.value_matr))
ModsOfInterst <- list()
for (iii in names(Traits_corr$correlation)){
aa <- length(ModsOfInterst)
if (iii %in% c("O2", "SecchiDepth")) {
A <- Traits_corr$correlation[iii][Traits_corr$correlation[iii] < 0, , drop=FALSE]
B <- Traits_corr$p_value[iii][Traits_corr$p_value[iii] < 0.05, , drop = FALSE]
BB <- B[rownames(B) %in% rownames(A), , drop = FALSE]
}
else {
BB <- Traits_corr$p_value[iii][Traits_corr$p_value[iii] < 0.05, , drop = FALSE]
}
ModsOfInterst[[aa+1]] <- BB
names(ModsOfInterst)[[aa+1]] <- iii
}
ModsOfInterst<- Filter(function(df) nrow(df) > 0, ModsOfInterst)
#########################################
#Loop Trough all the traits in traitData#
#########################################
TraitOfInterest_list <- list()
for (TraitOfInterest in traitData) {
tryCatch({
TraitOfInterest_list_length <- length(TraitOfInterest_list)
#print(paste(TraitOfInterest, "Correlation", rownames(ModsOfInterst[[TraitOfInterest]])))
# Specific column name and row names to subset by
column_name <- colnames(ModsOfInterst[[TraitOfInterest]])
row_names_to_subset <- rownames(ModsOfInterst[[TraitOfInterest]])
# Function to subset and bind data frames
subset_and_bind <- function(df) {
subset_df <- df[row_names_to_subset, column_name, drop = FALSE]
return(subset_df)}
subset_dataframes <- lapply(Traits_corr, subset_and_bind)
subset_dataframes <- lapply(names(subset_dataframes), function(item_name) {
df <- subset_dataframes[[item_name]]
colnames(df) <- paste(colnames(df), item_name, sep = "_")
return(df)})
result_dataframe <- do.call(cbind, subset_dataframes)
result_dataframe <- dplyr::left_join(result_dataframe %>%
mutate(rowname = rownames(.)), module_size |> mutate(rowname = paste("ME", Module, sep="")))
#print(head(result_dataframe))
#############################################################################
#Creating GeneMembership and GeneSignificance Dataframes with GeneAnnotation#
#############################################################################
result_dataframe_list <- list()
for (i in result_dataframe$rowname) {
aa <- length(result_dataframe_list)
MODULE <- i
g <- i
gg <- sub("ME", "", g); print(gg)
modNames = names(MEs) #substring(names(MEs), 2)
#print(paste(MODULE, "vs", TraitOfInterest, sep=" "))
column = match(MODULE, modNames);
moduleGenes <- moduleLabels[moduleLabels == sub("ME","", MODULE)]
nSamples = nrow(omics_data)
# Define variable K containing the K column of datTrait
TraitOfInterest_dat = as.data.frame(SAM[TraitOfInterest]);
names(TraitOfInterest_dat) = paste(TraitOfInterest)
# names (colors) of the modules
#Calculate Module Membership
geneModuleMembership = as.data.frame(cor(omics_data, MEs, use = "p"));
MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples));
names(geneModuleMembership) = paste("MM", modNames, sep="");
names(MMPvalue) = paste("p.MM", modNames, sep="");
#Calculate Gene Significance
geneTraitSignificance = as.data.frame(cor(omics_data, TraitOfInterest_dat, use = "p"));
GSPvalue = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance), nSamples));
names(geneTraitSignificance) = paste("GS.", names(TraitOfInterest_dat), sep="");
names(GSPvalue) = paste("p.GS.", names(TraitOfInterest_dat), sep="");
#Make all positive Dataframe for <- Not for this Plot!
GenSig <-geneTraitSignificance
GenSig$Geneid <-row.names(GenSig)
#GenSig[,names(GenSig) %in% paste("GS.", TraitOfInterest, sep="")] <-abs(GenSig[,names(GenSig) %in%
# paste("GS.", TraitOfInterest, sep="")])
GenSig <- dplyr::left_join(GenSig, GSPvalue %>% mutate(Geneid = rownames(.)))
#head(GenSig)
if (Type != "SSU") {
GenSig <- dplyr::left_join(GenSig[GenSig$Geneid %in% names(moduleGenes),], ANNO[[paste("GeneAnno_", i, sep="")]])
}
if (Type == "SSU") {
GenSig <- GenSig[GenSig$Geneid %in% names(moduleGenes),]
}
A <- dplyr::left_join(GenSig, abs(geneModuleMembership[names(moduleGenes),
paste("MM", i, sep=""), drop = FALSE]) %>% mutate(Geneid = rownames(.)))
names(A)[names(A) == paste("MM", i, sep="")] <- "ModuleMembership"
#A <- as.data.frame(cbind(abs(geneModuleMembership[names(moduleGenes), paste("MM", i, sep="")]), A))
A$Module_color <- result_dataframe[result_dataframe$rowname == i,]$Module_color
#head(A)
result_dataframe_list[[aa+1]] <- A
names(result_dataframe_list)[[aa+1]] <- paste(i, TraitOfInterest, sep="_")
}
B <- do.call(rbind, result_dataframe_list)
B$Modules <- sub("\\..*","",rownames(B)); rownames(B) <-NULL
#head(B)
connectivity <- cbind(degrees, moduleColors, geneModuleMembership)
BB <- left_join(B,connectivity %>% mutate(Geneid = rownames(.)))
BBB <- BB[BB$Module_color !="grey",]
head(BBB)
BBB$Modules <- sub(paste("_", TraitOfInterest, sep=""), "", BBB$Modules)
BBB$Modules <- sub("ME", "", BBB$Modules)
names(BBB) <- gsub(TraitOfInterest, "", names(BBB))
BBB$TraitOfInterest <- TraitOfInterest
if (Type != "SSU") {
BBB$Modules <- paste("RNA", BBB$Modules, sep="")
} else if (Type == "SSU") {
BBB$Modules <- paste("SSU", BBB$Modules, sep="")
}
}, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
######
#Plot#
######
require(cowplot)
tryCatch({
#if (OperatingSystem == "Mac" ) {quartz() }
FILENAME <- paste(paste(Species, Year, Season, Analysis, Tissue, Type, "MultiModuleScatter-kwithin", TraitOfInterest, sep="_"),
".png", sep="")
plot <- ggplot(data = BBB, aes(x = ModuleMembership, y = BBB[[1]])) +
geom_point(aes(colour = Module_color, size = kWithin), alpha = 0.5) +
scale_size_continuous(range = c(0.4, 4)) + # Adjust the size range
xlab(paste("Module Membership", sep = " ")) +
ylab(paste("Gene significance for", TraitOfInterest, sep = " ")) +
labs(title = paste(Tissue, Type, "Module Membership vs. Gene Significance"),
color = "sig. Modules") +
scale_color_identity(guide = 'legend',
breaks = unique(BBB$Module_color),
labels = unique(BBB$Modules)) +
geom_hline(yintercept = 0, color = "red", linetype = "dashed")
if (Type != "SSU") {
plot <- plot + ggrepel::geom_text_repel(
data = BBB, size = 2.5, aes(label = GeneSymbolHS), color = OutlineColor)
} else if (Type == "SSU") {
plot <- plot + ggrepel::geom_text_repel(
data = BBB, size = 2.5, aes(label = Geneid), color = OutlineColor)
}
plot <- plot + theme_minimal() + atheme + theme(axis.title.x = element_blank()) +
theme(
panel.grid.major = element_line(colour = "grey50"),
panel.grid.minor = element_line(colour = "grey50"),
legend.position = "right")
ggsave(plot, filename = FILENAME, path = pathPlots, device='png', dpi=300, width = 5, height = 5)
##################
#Plot a selection#
##################
if (TraitOfInterest %in% c("O2", "Salinity", "SecchiDepth")) {
plot(plot)
} else {
#print("Plots are saved to the pathPlots")
}
}, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
TraitOfInterest_list[[TraitOfInterest_list_length+1]] <- BBB
names(TraitOfInterest_list)[[TraitOfInterest_list_length+1]] <- TraitOfInterest
}
scatter_list[[a+1]] <- TraitOfInterest_list
names(scatter_list)[[a+1]] <- paste("Scatter", Type, Tissue, sep="_")
}, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
}
## [1] "0"
## ERROR : replacement has 1 row, data has 0
## [1] "2"
## [1] "8"
## [1] "5"
## [1] "0"
## [1] "2"
## [1] "8"
## [1] "5"
## [1] "7"
## [1] "0"
## ERROR : 'names' attribute [1] must be the same length as the vector [0]
## ERROR : 'names' attribute [1] must be the same length as the vector [0]
## [1] "8"
## [1] "5"
## [1] "7"
## [1] "9"
## [1] "6"
## [1] "11"
## [1] "0"
## [1] "9"
## [1] "2"
## [1] "0"
## [1] "5"
## [1] "15"
## [1] "9"
## [1] "27"
## [1] "12"
## [1] "13"
## [1] "22"
## [1] "8"
## [1] "33"
## [1] "25"
## [1] "38"
## [1] "4"
## [1] "13"
## [1] "3"
## [1] "23"
## [1] "25"
## [1] "38"
## [1] "5"
## [1] "4"
## [1] "37"
## [1] "12"
## [1] "13"
## [1] "3"
## [1] "23"
## [1] "14"
## [1] "28"
## [1] "30"
## [1] "2"
## [1] "31"
## [1] "16"
## [1] "30"
## [1] "2"
## [1] "11"
## [1] "7"
## [1] "10"
## [1] "25"
## [1] "38"
## [1] "4"
## [1] "21"
## [1] "28"
## [1] "34"
## [1] "2"
## [1] "11"
## [1] "7"
## [1] "10"
## [1] "5"
## [1] "37"
## [1] "12"
## [1] "13"
## [1] "22"
## [1] "34"
## [1] "1"
## [1] "6"
## [1] "12"
## [1] "13"
## [1] "22"
## [1] "3"
## [1] "23"
## [1] "24"
## [1] "9"
## [1] "29"
## [1] "30"
## [1] "11"
## [1] "22"
## [1] "3"
## [1] "25"
## [1] "1"
## [1] "7"
## [1] "14"
## [1] "3"
## [1] "1"
## [1] "7"
## [1] "14"
## [1] "3"
## [1] "9"
## [1] "1"
## [1] "6"
## [1] "10"
## [1] "19"
## [1] "30"
## [1] "25"
## [1] "24"
## [1] "12"
## [1] "0"
## [1] "14"
## [1] "2"
## [1] "8"
## [1] "29"
## [1] "0"
## [1] "6"
## [1] "10"
## [1] "36"
## [1] "18"
## [1] "38"
## [1] "8"
## [1] "12"
## [1] "11"
## [1] "25"
## [1] "0"
## [1] "9"
## [1] "1"
## [1] "6"
###########
#Save Data#
###########
saveRDS(scatter_list, file = paste0(file.path(path_Output_WGCNA, paste(paste(Species, Year, Season, Analysis, "scatter_list", Date, sep="_"), ".rds", sep=""))))
scatter_list <-readRDS(file = paste0(file.path(path_Output_WGCNA, paste(paste(Species, Year, Season, Analysis, "scatter_list", Date, sep="_"), ".rds", sep=""))))
################################################
#Select Immune-relevant Modules in Host-Tissues#
################################################
#Modules with KEGG & GO Enrichments related to immune response
Liver_Immune <- c(2, 3, 4, 7, 8)
Gill_Immune <- c(1, 2, 7, 11, 14, 16, 17)
ImmuneList <- list("Gill_Immune" = Gill_Immune, "Liver_Immune" = Liver_Immune)
immune_list <- list()
immune_scatter_list <- list()
for (WGCNAset in names(WGCNA_list)[grepl("RNA_", names(WGCNA_list))]) {
require(WGCNA)
Tissue <- sub(paste0(".*", "(Gill|Liver)", ".*"), "\\1", WGCNAset)
Type <- sub(paste0(".*", "(RNA|SSU)", ".*"), "\\1", WGCNAset)
RNA_MEs <- as.data.frame(WGCNA_list[[WGCNAset]][grepl("MEs", names(WGCNA_list[[WGCNAset]]))][[1]])
SSU <- WGCNA_list[["SSU_Gill_WGCNA"]]
omics_data <- as.data.frame(WGCNA_list[["SSU_Gill_WGCNA"]][grepl("omics_data",
names(WGCNA_list[["SSU_Gill_WGCNA"]]))][[1]])
omics_data <- omics_data %>% dplyr::arrange(factor(rownames(omics_data), levels = rownames(RNA_MEs)))
SAM <- as.data.frame(WGCNA_list[[WGCNAset]][grepl("datTraits", names(WGCNA_list[[WGCNAset]]))][[1]])
SAM <-SAM %>% dplyr::arrange(factor(rownames(SAM), levels = rownames(RNA_MEs)))
ANNO <- WGCNA_list[[WGCNAset]][grepl("GeneAnno", names(WGCNA_list[[WGCNAset]]))]
moduleColors <- WGCNA_list[["SSU_Gill_WGCNA"]][grepl("moduleColors", names(WGCNA_list[["SSU_Gill_WGCNA"]]))][[1]]
moduleLabels <- WGCNA_list[["SSU_Gill_WGCNA"]][grepl("moduleLabels", names(WGCNA_list[["SSU_Gill_WGCNA"]]))][[1]]
RNA_MEs_Immune <- RNA_MEs[names(RNA_MEs) %in% paste("ME", ImmuneList[[paste(Tissue, "Immune", sep="_")]], sep="")]
names(RNA_MEs_Immune) <- paste("RNA", sub("ME", "", names(RNA_MEs_Immune)), sep="")
omics_data <- omics_data[rownames(omics_data) %in% rownames(RNA_MEs_Immune),]
SSU_MEs <- SSU$SSU_Gill_MEs[rownames(SSU$SSU_Gill_MEs) %in% rownames(RNA_MEs_Immune),]
SSU_MEs <-SSU_MEs%>% dplyr::arrange(factor(rownames(SSU_MEs), levels = rownames(RNA_MEs)))
names(SSU_MEs) <- paste("SSU", sub("ME", "", names(SSU_MEs)), sep="")
module_size<- as.data.frame(table(moduleLabels))
colnames(module_size) <- c("Module", "Size")
module_size %>% dplyr::mutate(Module_color = labels2colors(as.numeric(as.character(Module)))) -> module_size
#################
#Cor-Mat RNA-SSU#
#################
#print("Cor-Mat RNA-SSU")
p.value_matr <- corr.value_matr <- matrix(ncol = ncol(SSU_MEs), nrow = ncol(RNA_MEs),
dimnames = list(colnames(RNA_MEs), colnames(SSU_MEs)))
for(ii in 1:ncol(RNA_MEs)){
for(j in 1:ncol(SSU_MEs)){
cor.res <- cor.test(RNA_MEs[,ii], SSU_MEs[,j])
p.value_matr[ii, j] <- cor.res$p.value
corr.value_matr[ii, j] <- cor.res$estimate
}
}
# Correct for number of tests
p.value_matr.adjust <- p.adjust(p.value_matr, method = "fdr")
dim(p.value_matr.adjust) <- dim(p.value_matr)
dimnames(p.value_matr.adjust) <- list(colnames(RNA_MEs), colnames(SSU_MEs))
# Collect all results into a list.
SSU_corr_RNA <- list(p_value = p.value_matr,
p_value_adj = p.value_matr.adjust,
correlation = corr.value_matr)
Sig_SSUs <-
as.data.frame(SSU_corr_RNA$p_value[paste("ME", ImmuneList[[paste(Tissue, "Immune", sep="_")]], sep=""),]) %>%
dplyr::select_if(~ any(. < 0.05))
Sig_SSUs<- Sig_SSUs[names(Sig_SSUs) != "SSU0"]
############################################
#Create Taxa Significance to Immune Modules#
############################################
result_dataframe_list <- list()
for (i in names(SSU_MEs)) {
for (j in names(RNA_MEs_Immune)){
aa <- length(result_dataframe_list)
MODULE <- i
g <- i
gg <- sub("SSU", "", g); print(gg)
modNames = names(SSU_MEs)
#print(paste(MODULE, "vs", j, sep=" "))
column = match(MODULE, modNames);
moduleGenes <- moduleLabels[moduleLabels == sub("SSU","", MODULE)]
nSamples = nrow(omics_data)
# Define variable K containing the K column of datTrait
TraitOfInterest_dat = as.data.frame(RNA_MEs_Immune[j])
names(TraitOfInterest_dat) = paste(j)
# names (colors) of the modules
#Calculate Module Membership
geneModuleMembership = as.data.frame(cor(omics_data, SSU_MEs, use = "p"));
MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples));
names(geneModuleMembership) = paste("MM", modNames, sep="");
names(MMPvalue) = paste("p.MM", modNames, sep="");
#Calculate Gene Significance
geneTraitSignificance = as.data.frame(cor(omics_data, TraitOfInterest_dat, use = "p"));
GSPvalue = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance), nSamples));
names(geneTraitSignificance) = paste("GS.")
names(GSPvalue) = paste("p.GS.")
#Make all positive Dataframe for <- Not for this Plot!
GenSig <-geneTraitSignificance
GenSig$Geneid <-row.names(GenSig)
#GenSig[,names(GenSig) %in% paste("GS.", TraitOfInterest, sep="")] <-abs(GenSig[,names(GenSig) %in%
# paste("GS.", TraitOfInterest, sep="")])
GenSig <- dplyr::left_join(GenSig, GSPvalue %>% mutate(Geneid = rownames(.)))
#head(GenSig)
GenSig <- GenSig[GenSig$Geneid %in% names(moduleGenes),]
A <- dplyr::left_join(GenSig, abs(geneModuleMembership[names(moduleGenes),
paste("MM", i, sep=""), drop = FALSE]) %>% mutate(Geneid = rownames(.)))
names(A)[names(A) == paste("MM", i, sep="")] <- "ModuleMembership"
A$SSU_Module <- paste(i)
A$RNA_Module <- paste(j)
A <- dplyr::left_join(A,module_size[module_size$Module %in% sub("SSU","", i),] |>
mutate(SSU_Module = paste("SSU", Module, sep="")))
#head(A)
result_dataframe_list[[aa+1]] <- A
names(result_dataframe_list)[[aa+1]] <- paste(i, j, sep="_")
}
}
B <- do.call(rbind, result_dataframe_list)
B$Modules <- sub("\\..*","",rownames(B)); rownames(B) <-NULL
#head(B)
geneModuleMembership = as.data.frame(cor(as.data.frame(WGCNA_list[["SSU_Gill_WGCNA"]][grepl("omics_data",
names(WGCNA_list[["SSU_Gill_WGCNA"]]))][[1]]),
as.data.frame(WGCNA_list[["SSU_Gill_WGCNA"]][grepl("MEs",
names(WGCNA_list[["SSU_Gill_WGCNA"]]))][[1]]), use = "p"));
MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples));
names(geneModuleMembership) = paste("MM", modNames, sep="");
names(MMPvalue) = paste("p.MM", modNames, sep="");
connectivity <- cbind(degrees_list$degrees_Gill_SSU, moduleColors, geneModuleMembership)
BB <- left_join(B,connectivity %>% mutate(Geneid = rownames(.)))
BBB <- BB[BB$SSU_Module %in% names(Sig_SSUs),]
for (k in unique(BBB$RNA_Module)) {
require(cowplot)
tryCatch({
FILENAME <-paste("WGCNA-ModuleScatter-kwithin_SSU_Immune",Species,Tissue, k, sep="_")
BBBB <- BBB[BBB$RNA_Module %in% k, ]
SSU_Counts <- SSU$SSU_Tax$All[-which(names(SSU$SSU_Tax$All) %in% "values")]
BBBBB <- left_join(BBBB, SSU_Counts[rownames(SSU_Counts) %in% BBBB$Geneid, ] %>%
mutate(Geneid = rownames(.)))
a <- length(immune_list)
immune_list[[a+1]] <- BBBBB
names(immune_list)[[a+1]] <- paste("GS", Tissue, k, sep="_")
plot <- ggplot(data = BBBB, aes(x = ModuleMembership, y = GS.)) +
geom_point(aes(colour = Module_color, size = kWithin), alpha = 0.7) +
scale_size_continuous(range = c(1, 8)) + # Adjust the size range
xlab(paste("Module Membership", sep = " ")) +
ylab(paste("Node significance for", Tissue, k, sep = " ")) +
labs(title = paste("Module Membership vs. Node Significance to", Tissue, k),
color = "sig. Modules") +
scale_color_identity(guide = 'legend',
breaks = unique(BBBB$Module_color),
labels = unique(BBBB$SSU_Module)) +
geom_hline(yintercept = 0, color = "red", linetype = "dashed")
plot <- plot + ggrepel::geom_text_repel(
data = BBBB, size = 2.5, aes(label = Geneid), color = "grey10")
plot <- plot + theme_minimal() +
#atheme +
atheme +
theme(axis.title.x = element_blank()) +
theme(
panel.grid.major = element_line(colour = "grey50"),
panel.grid.minor = element_line(colour = "grey50"),
legend.position = "right")
#ggsave(plot, filename = FILENAME, path = pathPlots, device='png', dpi=300, width = 7, height = 7)
immune_scatter_list_length <- length(immune_scatter_list)
immune_scatter_list[[immune_scatter_list_length+1]] <- plot
names(immune_scatter_list)[[immune_scatter_list_length+1]] <- paste("GS", Tissue, k, sep="_")
}, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
}
}
## [1] "4"
## [1] "4"
## [1] "4"
## [1] "4"
## [1] "4"
## [1] "4"
## [1] "4"
## [1] "1"
## [1] "1"
## [1] "1"
## [1] "1"
## [1] "1"
## [1] "1"
## [1] "1"
## [1] "10"
## [1] "10"
## [1] "10"
## [1] "10"
## [1] "10"
## [1] "10"
## [1] "10"
## [1] "9"
## [1] "9"
## [1] "9"
## [1] "9"
## [1] "9"
## [1] "9"
## [1] "9"
## [1] "2"
## [1] "2"
## [1] "2"
## [1] "2"
## [1] "2"
## [1] "2"
## [1] "2"
## [1] "3"
## [1] "3"
## [1] "3"
## [1] "3"
## [1] "3"
## [1] "3"
## [1] "3"
## [1] "6"
## [1] "6"
## [1] "6"
## [1] "6"
## [1] "6"
## [1] "6"
## [1] "6"
## [1] "11"
## [1] "11"
## [1] "11"
## [1] "11"
## [1] "11"
## [1] "11"
## [1] "11"
## [1] "12"
## [1] "12"
## [1] "12"
## [1] "12"
## [1] "12"
## [1] "12"
## [1] "12"
## [1] "8"
## [1] "8"
## [1] "8"
## [1] "8"
## [1] "8"
## [1] "8"
## [1] "8"
## [1] "5"
## [1] "5"
## [1] "5"
## [1] "5"
## [1] "5"
## [1] "5"
## [1] "5"
## [1] "7"
## [1] "7"
## [1] "7"
## [1] "7"
## [1] "7"
## [1] "7"
## [1] "7"
## [1] "0"
## [1] "0"
## [1] "0"
## [1] "0"
## [1] "0"
## [1] "0"
## [1] "0"
## [1] "4"
## [1] "4"
## [1] "4"
## [1] "4"
## [1] "4"
## [1] "1"
## [1] "1"
## [1] "1"
## [1] "1"
## [1] "1"
## [1] "10"
## [1] "10"
## [1] "10"
## [1] "10"
## [1] "10"
## [1] "9"
## [1] "9"
## [1] "9"
## [1] "9"
## [1] "9"
## [1] "2"
## [1] "2"
## [1] "2"
## [1] "2"
## [1] "2"
## [1] "3"
## [1] "3"
## [1] "3"
## [1] "3"
## [1] "3"
## [1] "6"
## [1] "6"
## [1] "6"
## [1] "6"
## [1] "6"
## [1] "11"
## [1] "11"
## [1] "11"
## [1] "11"
## [1] "11"
## [1] "12"
## [1] "12"
## [1] "12"
## [1] "12"
## [1] "12"
## [1] "8"
## [1] "8"
## [1] "8"
## [1] "8"
## [1] "8"
## [1] "5"
## [1] "5"
## [1] "5"
## [1] "5"
## [1] "5"
## [1] "7"
## [1] "7"
## [1] "7"
## [1] "7"
## [1] "7"
## [1] "0"
## [1] "0"
## [1] "0"
## [1] "0"
## [1] "0"
###########
#Save Data#
###########
saveRDS(immune_scatter_list, file = paste0(file.path(path_Output_WGCNA, paste(paste(Species, Year, Season, Analysis, "immune_scatter_list", Date, sep="_"), ".rds", sep=""))))
immune_scatter_list <-readRDS(file = paste0(file.path(path_Output_WGCNA, paste(paste(Species, Year, Season, Analysis, "immune_scatter_list", Date, sep="_"), ".rds", sep=""))))
#immune_scatter_list$GS_Gill_RNA7
Tissue="Gill"
GS_RNA_SSU <- immune_list$GS_Gill_RNA7
subset_df <- GS_RNA_SSU %>%
dplyr::group_by(SSU_Module) #%>%
# dplyr::filter(p.GS. < 0.05, GS. > 0.3)
#subset_df$Geneid
BacteriaHostCorrelatonPlot <- ggplot(data = subset_df, aes(x = ModuleMembership, y = GS.)) +
geom_point(aes(colour = Module_color, size = kWithin), alpha = 0.7) +
scale_size_continuous(range = c(1, 8)) + # Adjust the size range
xlab(paste("Module Membership", sep = " ")) +
ylab(paste("Correlation to", Tissue, subset_df$RNA_Module[1], sep = " ")) +
labs(title = paste("Bacterial abundance correlated to host Gill-", subset_df$RNA_Module[1],
sep=""),
color = "sig. Modules") +
scale_color_identity(guide = 'legend',
breaks = unique(subset_df$Module_color),
labels = unique(subset_df$SSU_Module)) +
geom_hline(yintercept = 0, color = "red", linetype = "dashed")
BacteriaHostCorrelatonPlot <- BacteriaHostCorrelatonPlot + ggrepel::geom_text_repel(
data = subset_df, size = 3, aes(label = Geneid), color = OutlineColor) #white
BacteriaHostCorrelatonPlot <- BacteriaHostCorrelatonPlot + theme_minimal() +
atheme+
#atheme +
theme(axis.title.x = element_blank()) +
theme(
panel.grid.major = element_line(colour = "grey50"),
panel.grid.minor = element_line(colour = "grey50"),
legend.position = "right",
legend.title = element_text( size = 10, face = "bold"),
legend.text = element_text(size = 12,face = "bold"),
axis.text.x.bottom = element_text(size = 10, face = "bold", angle = 45, hjust = 1),
axis.text.y.left = element_text(size = 10, face = "bold"))
ggsave(BacteriaHostCorrelatonPlot, filename = paste(paste(Species, Year, Season, Analysis, "GS_SSU", Tissue, subset_df$Modules[1], sep="_"), ".png", sep="") , path = pathPlots, device='png', dpi=300, width = 7, height = 5)
BacteriaHostCorrelatonPlot
immune_list_filtered <- list()
for (GS_Immune in names(immune_list)) {
require(phyloseq)
immune_list_filtered_length <- length(immune_list_filtered )
Tissue <- sub(paste0(".*", "(Gill|Liver)", ".*"), "\\1", GS_Immune)
Type <- sub(paste0(".*", "(RNA|SSU)", ".*"), "\\1", GS_Immune)
GS_RNA_SSU <- immune_list[[GS_Immune]]
subset_df <- GS_RNA_SSU %>%
dplyr::group_by(SSU_Module) %>%
dplyr::filter(p.GS. < 0.05, GS. > 0.3)
subset_df <- as.data.frame(subset_df)
subset_df<- subset_df %>%
dplyr::arrange(Genus, desc(GS.)) #SSU_Module,
WF_counts <- as.data.frame(t(otu_table(pslist$ps_SLWF_21))) %>%
dplyr::select("WFSU21MLFL", "WFSU21SSFL", "WFSU21MGEB", "WFSU21BBEB") %>%
dplyr::mutate("ASV" = rownames(.))
subset_df<- left_join(subset_df,WF_counts )
ps_SLWF_21_relAbund <- pslist$ps_SLWF_21 %>%
transform_sample_counts(function(x) {x/sum(x)*100})
SSU_counts_rel <- as.data.frame(t(otu_table(ps_SLWF_21_relAbund)))
names(SSU_counts_rel) <- paste(names(SSU_counts_rel), "rel", sep="")
SSU_counts_rel <- SSU_counts_rel %>% mutate("ASV" = rownames(.)) %>% as.data.frame()
subset_df<- left_join(subset_df,SSU_counts_rel)
#############
#Save as CSV#
#############
write.csv2(as.data.frame(subset_df), file = paste0(file.path(path_Output_WGCNA,
paste(paste(Species, Year, Analysis, Tissue, paste(unique(subset_df$RNA_Module)), "SSU_vs_HostRNA_Correlation", sep="_"),
"csv", sep="."))))
immune_list_filtered[[immune_list_filtered_length+1]] <- subset_df
names(immune_list_filtered)[[immune_list_filtered_length+1]] <- paste(GS_Immune, "filtered", sep="_")
}
head(immune_list_filtered$GS_Gill_RNA7_filtered, 2)
## GS. Geneid p.GS. ModuleMembership
## 1 0.6516825 ASV755:Acidovorax 0.0013712607 0.8668113
## 2 0.7726654 ASV977:Acinetobacter.johnsonii 0.0000403711 0.9051066
## SSU_Module RNA_Module Module Size Module_color Modules kTotal kWithin
## 1 SSU2 RNA7 2 137 blue SSU2_RNA7 67.09077 31.74264
## 2 SSU3 RNA7 3 71 brown SSU3_RNA7 62.78187 14.19582
## kOut kDiff moduleColors MMSSU4 MMSSU1 MMSSU10 MMSSU9
## 1 35.34813 -3.605494 blue -0.4422262 -0.3039456 -0.1557292 0.2452120
## 2 48.58605 -34.390232 brown -0.5487518 -0.6473798 -0.5666529 0.4743031
## MMSSU2 MMSSU3 MMSSU6 MMSSU11 MMSSU12 MMSSU8 MMSSU5
## 1 0.8180733 0.5273622 -0.04585472 0.236837080 -0.2924437 -0.4951006 -0.5324342
## 2 0.7281118 0.7858923 0.12927332 0.003158459 -0.2007372 0.1007720 -0.1590337
## MMSSU7 MMSSU0 Kingdom Phylum Class
## 1 -0.0931968 0.7449031 Bacteria Proteobacteria Gammaproteobacteria
## 2 0.3305519 0.4692715 Bacteria Proteobacteria Gammaproteobacteria
## Order Family Genus Species SLSU21BBEB7
## 1 Burkholderiales Comamonadaceae Acidovorax <NA> 0
## 2 Pseudomonadales Moraxellaceae Acinetobacter johnsonii 0
## SLSU21MGEB7 SLSU21MLEB9 SLSU21SSEB9 SLSU21BBEB1 SLSU21BBEB2 SLSU21BBEB4
## 1 1 4 0 0 0 0
## 2 0 14 5 0 0 0
## SLSU21BBEB6 SLSU21BBEB9 SLSU21MGEB1 SLSU21MGEB2 SLSU21MGEB3 SLSU21MGEB4
## 1 0 0 0 0 0 0
## 2 0 1 0 0 0 0
## SLSU21MGEB5 SLSU21MLEB1 SLSU21MLEB2 SLSU21MLEB5 SLSU21MLEB6 SLSU21MLEB7
## 1 1 10 67 11 6 0
## 2 4 2 62 31 19 10
## SLSU21SSEB2 SLSU21SSEB3 SLSU21SSEB5 SLSU21SSEB6 SLSU21SSEB7
## 1 0 0 0 0 0
## 2 12 1 1 0 0
## ASV WFSU21MLFL WFSU21SSFL WFSU21MGEB WFSU21BBEB
## 1 ASV755:Acidovorax 0 0 0 0
## 2 ASV977:Acinetobacter.johnsonii 2 0 0 0
## SLSU21BBEB7rel SLSU21MGEB7rel SLSU21MLEB9rel SLSU21SSEB9rel WFSU21MLFLrel
## 1 0 0.003649369 0.01481372 0.00000000 0.00000000
## 2 0 0.000000000 0.05184801 0.02192598 0.01404297
## WFSU21SSFLrel WFSU21MGEBrel WFSU21BBEBrel SLSU21BBEB1rel SLSU21BBEB2rel
## 1 0 0 0 0 0
## 2 0 0 0 0 0
## SLSU21BBEB4rel SLSU21BBEB6rel SLSU21BBEB9rel SLSU21MGEB1rel SLSU21MGEB2rel
## 1 0 0 0.000000000 0 0
## 2 0 0 0.001410795 0 0
## SLSU21MGEB3rel SLSU21MGEB4rel SLSU21MGEB5rel SLSU21MLEB1rel SLSU21MLEB2rel
## 1 0 0 0.009203866 0.09588647 0.1843546
## 2 0 0 0.036815462 0.01917729 0.1705968
## SLSU21MLEB5rel SLSU21MLEB6rel SLSU21MLEB7rel SLSU21SSEB2rel SLSU21SSEB3rel
## 1 0.0374736 0.01884836 0.00000000 0.00000000 0.000000000
## 2 0.1056074 0.05968649 0.07567731 0.08673027 0.003523608
## SLSU21SSEB5rel SLSU21SSEB6rel SLSU21SSEB7rel
## 1 0.000000000 0 0
## 2 0.009316192 0 0
head(immune_list_filtered$GS_Gill_RNA11_filtered, 2)
## GS. Geneid p.GS. ModuleMembership SSU_Module
## 1 0.5442708 ASV1630:Acinetobacter 0.01074729 0.8241485 SSU2
## 2 0.5210892 ASV770:Acinetobacter 0.01542407 0.9366448 SSU2
## RNA_Module Module Size Module_color Modules kTotal kWithin kOut
## 1 RNA11 2 137 blue SSU2_RNA11 68.65226 32.89256 35.75970
## 2 RNA11 2 137 blue SSU2_RNA11 82.09797 41.28892 40.80905
## kDiff moduleColors MMSSU4 MMSSU1 MMSSU10 MMSSU9 MMSSU2
## 1 -2.8671343 blue -0.5720878 -0.5786070 -0.4655375 0.1941441 0.8269920
## 2 0.4798666 blue -0.5541365 -0.5499178 -0.3876547 0.3236342 0.9368385
## MMSSU3 MMSSU6 MMSSU11 MMSSU12 MMSSU8 MMSSU5 MMSSU7
## 1 0.7152524 0.04236895 0.13859668 -0.03180319 -0.09811695 -0.3302596 0.2211816
## 2 0.7174357 -0.08479305 0.03016264 -0.10556740 -0.25310100 -0.4841415 0.1338719
## MMSSU0 Kingdom Phylum Class Order
## 1 0.5059756 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales
## 2 0.7607497 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales
## Family Genus Species SLSU21BBEB7 SLSU21MGEB7 SLSU21MLEB9
## 1 Moraxellaceae Acinetobacter <NA> 0 0 4
## 2 Moraxellaceae Acinetobacter <NA> 0 0 4
## SLSU21SSEB9 SLSU21BBEB1 SLSU21BBEB2 SLSU21BBEB4 SLSU21BBEB6 SLSU21BBEB9
## 1 1 0 0 0 0 0
## 2 0 0 0 0 0 0
## SLSU21MGEB1 SLSU21MGEB2 SLSU21MGEB3 SLSU21MGEB4 SLSU21MGEB5 SLSU21MLEB1
## 1 0 0 0 0 0 2
## 2 0 0 0 0 0 4
## SLSU21MLEB2 SLSU21MLEB5 SLSU21MLEB6 SLSU21MLEB7 SLSU21SSEB2 SLSU21SSEB3
## 1 15 8 0 12 0 0
## 2 98 79 2 17 0 0
## SLSU21SSEB5 SLSU21SSEB6 SLSU21SSEB7 ASV WFSU21MLFL
## 1 0 0 0 ASV1630:Acinetobacter 4
## 2 0 0 0 ASV770:Acinetobacter 1
## WFSU21SSFL WFSU21MGEB WFSU21BBEB SLSU21BBEB7rel SLSU21MGEB7rel SLSU21MLEB9rel
## 1 0 0 0 0 0 0.01481372
## 2 0 0 0 0 0 0.01481372
## SLSU21SSEB9rel WFSU21MLFLrel WFSU21SSFLrel WFSU21MGEBrel WFSU21BBEBrel
## 1 0.004385196 0.028085943 0 0 0
## 2 0.000000000 0.007021486 0 0 0
## SLSU21BBEB1rel SLSU21BBEB2rel SLSU21BBEB4rel SLSU21BBEB6rel SLSU21BBEB9rel
## 1 0 0 0 0 0
## 2 0 0 0 0 0
## SLSU21MGEB1rel SLSU21MGEB2rel SLSU21MGEB3rel SLSU21MGEB4rel SLSU21MGEB5rel
## 1 0 0 0 0 0
## 2 0 0 0 0 0
## SLSU21MLEB1rel SLSU21MLEB2rel SLSU21MLEB5rel SLSU21MLEB6rel SLSU21MLEB7rel
## 1 0.01917729 0.04127342 0.02725353 0.000000000 0.09081277
## 2 0.03835459 0.26965303 0.26912857 0.006282788 0.12865143
## SLSU21SSEB2rel SLSU21SSEB3rel SLSU21SSEB5rel SLSU21SSEB6rel SLSU21SSEB7rel
## 1 0 0 0 0 0
## 2 0 0 0 0 0
Venn_list_Gill<- list("Gill-RNA7" = unique(sub(".*:", "", immune_list_filtered$GS_Gill_RNA7_filtered$Geneid)), "Gill-RNA11" = unique(sub(".*:", "", immune_list_filtered$GS_Gill_RNA11_filtered$Geneid)))
Venn_Gill_GS_SSU<- ggVennDiagram::ggVennDiagram(Venn_list_Gill, edge_size=1, set_size = 5) + theme(legend.position = "none") +scale_x_continuous(expand = expansion(mult = .2)) + theme(panel.background = element_blank())
ggsave(Venn_Gill_GS_SSU, filename = paste(paste(Species, Year, Season, Analysis, "Venn_Gill_GS_SSU", Date, sep="_"), "Figure_4A.png", sep=""), path = pathPlots , device='png', dpi=300, width = 5, height = 5)
intersect(Venn_list_Gill$`Gill-RNA7`, Venn_list_Gill$`Gill-RNA11`)
## [1] "Acinetobacter.johnsonii" "Acinetobacter"
## [3] "Aeromonas" "Cetobacterium"
## [5] "Chryseobacterium" "Chryseobacterium.piscicola"
## [7] "Cyanobium PCC-6307" "Dinghuibacter"
## [9] "Enhydrobacter" "Legionella"
## [11] "Luteolibacter" "Methyloparacoccus"
## [13] "Polynucleobacter" "Shewanella.baltica"
## [15] "Shewanella" "Shewanella.putrefaciens"
## [17] "Stenotrophomonas" "Terrimicrobium"
## [19] "Verticiella" "Methylococcaceae"
## [21] "Gemmataceae" "Verrucomicrobiaceae"
## [23] "DEV007"
setdiff(Venn_list_Gill$`Gill-RNA7`, Venn_list_Gill$`Gill-RNA11`)
## [1] "Acidovorax" "Acinetobacter.tjernbergiae"
## [3] "Acinetobacter.lwoffii" "Aeromonas.popoffii"
## [5] "Aeromonas.bivalvium" "Alkanindiges"
## [7] "CL500-29 marine group" "Candidatus Megaira"
## [9] "Candidatus Trichorickettsia" "Chryseobacterium.haifense"
## [11] "LD29" "Microcystis PCC-7914"
## [13] "Planktothrix NIVA-CYA 15" "Plesiomonas.shigelloides"
## [15] "Polynucleobacter.asymbioticus" "Pseudomonas"
## [17] "Psychrobacter" "Roseomonas"
## [19] "Legionellaceae" "Paracaedibacteraceae"
## [21] "Rhizobiales Incertae Sedis"
setdiff(Venn_list_Gill$`Gill-RNA11`, Venn_list_Gill$`Gill-RNA7`)
## [1] "Aeromonas.sobria" "Altererythrobacter" "Arenimonas"
## [4] "Ellin6067" "Erythrobacter" "Exiguobacterium"
## [7] "Paracoccus" "Subgroup 10" "TRA3-20"
## [10] "Hyphomicrobiaceae" "Ardenticatenaceae" "SC-I-84"
## [13] "Anaerolineaceae" "Pedosphaeraceae"
degrees_list <-readRDS(file = paste0(file.path(path_Output_WGCNA, paste(paste(Species, Year, Season, Analysis, "degrees_list", Date, sep="_"), ".rds", sep=""))))
annotated_scatter_list <- list()
for (WGCNAset in names(WGCNA_list)) {
tryCatch({
a <- length(annotated_scatter_list)
Tissue <- sub(paste0(".*", "(Gill|Liver)", ".*"), "\\1", WGCNAset)
Type <- sub(paste0(".*", "(RNA|SSU)", ".*"), "\\1", WGCNAset)
omics_data <- as.data.frame(WGCNA_list[[WGCNAset]][grepl("omics_data", names(WGCNA_list[[WGCNAset]]))][[1]])
MEs <- as.data.frame(WGCNA_list[[WGCNAset]][grepl("MEs", names(WGCNA_list[[WGCNAset]]))][[1]])
SAM <- as.data.frame(WGCNA_list[[WGCNAset]][grepl("datTraits", names(WGCNA_list[[WGCNAset]]))][[1]])
if (Type != "SSU") {
ANNO <- WGCNA_list[[WGCNAset]][grepl("GeneAnno", names(WGCNA_list[[WGCNAset]]))][[1]]
}
moduleColors <- WGCNA_list[[WGCNAset]][grepl("moduleColors", names(WGCNA_list[[WGCNAset]]))][[1]]
moduleLabels <- WGCNA_list[[WGCNAset]][grepl("moduleLabels", names(WGCNA_list[[WGCNAset]]))][[1]]
module_size<- as.data.frame(table(moduleLabels))
colnames(module_size) <- c("Module", "Size")
module_size %>% dplyr::mutate(Module_color = labels2colors(as.numeric(as.character(Module)))) -> module_size
degrees <- degrees_list[[paste("degrees", Tissue, Type, sep="_")]]
###############################################################################
#Create Dataframes of significant correlation between RNAModules and TraitData#
###############################################################################
InterestingComparison <- ModsOfInterst_list[[grep(paste(Tissue, Type, sep="_"), names(ModsOfInterst_list))]]
InterestingComparison <- lapply(InterestingComparison, rownames_to_column)
InterestingComparison <- do.call(rbind, lapply(names(InterestingComparison), function(name) {
data.frame(name, InterestingComparison[[name]], stringsAsFactors = FALSE)}))
names(InterestingComparison) <- c("variable", "module", "correlation", "p_value")
InterestingComparison <- InterestingComparison[InterestingComparison$module != "ME0", ]
#InterestingComparison$module <- paste(sub("ME",Type, InterestingComparison$module))
TraitOfInterest_list <- list()
for (TraitOfInterest in unique(InterestingComparison$variable)) {
TraitOfInterest_list_length <- length(TraitOfInterest_list)
#############################################################################
#Creating GeneMembership and GeneSignificance Dataframes with GeneAnnotation#
#############################################################################
result_dataframe <- InterestingComparison[InterestingComparison$variable %in% TraitOfInterest,]
result_dataframe_list <- list()
for (i in result_dataframe$module) {
aa <- length(result_dataframe_list)
MODULE <- i
g <- i
gg <- sub("ME", "", g); print(gg)
modNames = names(MEs) #substring(names(MEs), 2)
#print(paste(MODULE, "vs", TraitOfInterest, sep=" "))
column = match(MODULE, modNames);
moduleGenes <- moduleLabels[moduleLabels == sub("ME","", MODULE)]
nSamples = nrow(omics_data)
# Define variable K containing the K column of datTrait
TraitOfInterest_dat = as.data.frame(SAM[TraitOfInterest]);
names(TraitOfInterest_dat) = paste(TraitOfInterest)
# names (colors) of the modules
#Calculate Module Membership
geneMM = as.data.frame(cor(omics_data, MEs, use = "p"));
MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneMM), nSamples));
names(geneMM) = paste("MM", modNames, sep="");
names(MMPvalue) = paste("p.MM", modNames, sep="");
geneModuleMembership<- geneMM[,names(geneMM) %in% paste("MM", MODULE, sep=""), drop = F]
MMPvalue <- MMPvalue[,names(MMPvalue) %in% paste("p.MM", MODULE, sep=""), drop = F]
geneModuleMembership <- left_join(geneModuleMembership %>% mutate(Geneid = rownames(.)), MMPvalue %>% mutate(Geneid = rownames(.)))
#Calculate Gene Significance
geneTraitSignificance = as.data.frame(cor(omics_data, TraitOfInterest_dat, use = "p"));
GSPvalue = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance), nSamples));
names(geneTraitSignificance) = paste("GS.", names(TraitOfInterest_dat), sep="");
names(GSPvalue) = paste("p.GS.", names(TraitOfInterest_dat), sep="");
#Make all positive Dataframe for <- Not for this Plot!
GenSig <-geneTraitSignificance
GenSig$Geneid <-row.names(GenSig)
#GenSig[,names(GenSig) %in% paste("GS.", TraitOfInterest, sep="")] <-abs(GenSig[,names(GenSig) %in%
# paste("GS.", TraitOfInterest, sep="")])
GenSig <- dplyr::left_join(GenSig, GSPvalue %>% mutate(Geneid = rownames(.)))
#head(GenSig)
if (Type != "SSU") {
GenSig <- dplyr::left_join(GenSig[GenSig$Geneid %in% names(moduleGenes),], ANNO[[paste("GeneAnno_", i, sep="")]])
}
if (Type == "SSU") {
GenSig <- GenSig[GenSig$Geneid %in% names(moduleGenes),]
}
A <- dplyr::left_join(GenSig, geneModuleMembership[geneModuleMembership$Geneid %in% names(moduleGenes),])
names(A)[names(A) == paste("MM", i, sep="")] <- "ModuleMembership"
names(A)[names(A) == paste("p.MM", i, sep="")] <- "p.ModuleMembership"
A$Module <- MODULE
A$Module_color <- module_size[module_size$Module %in% sub("ME", "", MODULE),]$Module_color
A$Size <- module_size[module_size$Module %in% sub("ME", "", MODULE),]$Size
#head(A)
result_dataframe_list[[aa+1]] <- A
names(result_dataframe_list)[[aa+1]] <- paste(i, TraitOfInterest, sep="_")
}
B <- do.call(rbind, result_dataframe_list)
B$Modules <- sub("\\..*","",rownames(B)); rownames(B) <-NULL
#head(B)
connectivity <- cbind(degrees, moduleColors, names(moduleLabels), geneMM)
connectivity <- connectivity[,names(connectivity) %in% names(degrees)]
BB <- left_join(B,connectivity %>% mutate(Geneid = rownames(.)))
BBB <- BB[BB$Module_color !="grey",]
head(BBB)
BBB$Modules <- sub(paste("_", TraitOfInterest, sep=""), "", BBB$Modules)
BBB$Modules <- sub("ME", "", BBB$Modules)
if (Type != "SSU") {
BBB$Modules <- paste("RNA", BBB$Modules, sep="")
} else if (Type == "SSU") {
BBB$Modules <- paste("SSU", BBB$Modules, sep="")
}
######
#Plot#
######
for (Mods in unique(BBB$Modules)) {
BBBB <- BBB[BBB$Modules %in% Mods,]
require(cowplot)
tryCatch({
#if (OperatingSystem == "Mac" ) {quartz() }
FILENAME <-paste(paste(Species, Year, Season, Analysis, Tissue, Type,"ModuleScatter-kwithin", TraitOfInterest, Mods, sep="_"),".png", sep="")
plot <- ggplot(data = BBBB, aes(x = ModuleMembership, y = BBBB[[paste("GS.",TraitOfInterest, sep="")]])) +
geom_point(aes(colour = Module_color, size = kWithin), alpha = 0.5) +
scale_size_continuous(range = c(0.4, 4)) + # Adjust the size range
xlab(paste("Module Membership", sep = " ")) +
ylab(paste("Gene significance for", TraitOfInterest, sep = " ")) +
labs(title = paste(Tissue, Mods, "Module Membership", "vs.", "Gene Significance"),
color = "sig. Modules") +
scale_color_identity(guide = 'legend',
breaks = unique(BBBB$Module_color),
labels = unique(BBBB$Modules)) +
geom_hline(yintercept = 0, color = "red", linetype = "dashed")
if (Type != "SSU") {
plot <- plot + ggrepel::geom_text_repel(
data = BBBB, size = 2.5, aes(label = GeneSymbolHS), color = OutlineColor)
} else if (Type == "SSU") {
plot <- plot + ggrepel::geom_text_repel(
data = BBBB, size = 2.5, aes(label = Geneid), color = OutlineColor)
}
plot <- plot + theme_minimal() + atheme + theme(axis.title.x = element_blank()) +
theme(
panel.grid.major = element_line(colour = "grey50"),
panel.grid.minor = element_line(colour = "grey50"),
legend.position = "right")
ggsave(plot, filename = FILENAME, path = pathPlots, device='png', dpi=300, width = 5, height = 5)
##################
#Plot a selection#
##################
if (Mods %in% c("RNA2")) {
plot(plot)
} else if (Mods %in% c("SSU2")) {
plot(plot)
} else {
#print("Plots are saved to the pathPlots")
}
}, error=function(e){cat("ERROR :",conditionMessage(e), "\n")}) }
TraitOfInterest_list[[TraitOfInterest_list_length+1]] <- BBB
names(TraitOfInterest_list)[[TraitOfInterest_list_length+1]] <- TraitOfInterest
}
annotated_scatter_list[[a+1]] <- TraitOfInterest_list
names(annotated_scatter_list)[[a+1]] <- paste("Scatter", Type, Tissue, sep="_")
}, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
}
## [1] "2"
## [1] "8"
## [1] "5"
## [1] "2"
## [1] "8"
## [1] "5"
## [1] "7"
## [1] "8"
## [1] "5"
## [1] "7"
## [1] "9"
## [1] "6"
## [1] "11"
## [1] "9"
## [1] "2"
## [1] "5"
## [1] "15"
## [1] "9"
## [1] "27"
## [1] "12"
## [1] "13"
## [1] "22"
## [1] "8"
## [1] "33"
## [1] "25"
## [1] "38"
## [1] "4"
## [1] "13"
## [1] "3"
## [1] "23"
## [1] "25"
## [1] "38"
## [1] "5"
## [1] "4"
## [1] "37"
## [1] "12"
## [1] "13"
## [1] "3"
## [1] "23"
## [1] "5"
## [1] "21"
## [1] "37"
## [1] "12"
## [1] "3"
## [1] "19"
## [1] "14"
## [1] "28"
## [1] "30"
## [1] "2"
## [1] "31"
## [1] "16"
## [1] "30"
## [1] "2"
## [1] "11"
## [1] "7"
## [1] "10"
## [1] "25"
## [1] "38"
## [1] "4"
## [1] "21"
## [1] "28"
## [1] "34"
## [1] "2"
## [1] "11"
## [1] "7"
## [1] "10"
## [1] "5"
## [1] "37"
## [1] "12"
## [1] "13"
## [1] "22"
## [1] "34"
## [1] "1"
## [1] "6"
## [1] "12"
## [1] "13"
## [1] "22"
## [1] "3"
## [1] "23"
## [1] "24"
## [1] "9"
## [1] "29"
## [1] "30"
## [1] "11"
## [1] "22"
## [1] "3"
## [1] "25"
## [1] "1"
## [1] "7"
## [1] "14"
## [1] "3"
## [1] "1"
## [1] "7"
## [1] "14"
## [1] "3"
## [1] "1"
## [1] "6"
## [1] "10"
## [1] "2"
## [1] "33"
## [1] "29"
## [1] "3"
## [1] "25"
## [1] "9"
## [1] "1"
## [1] "6"
## [1] "10"
## [1] "19"
## [1] "30"
## [1] "25"
## [1] "24"
## [1] "12"
## [1] "14"
## [1] "2"
## [1] "8"
## [1] "29"
## [1] "6"
## [1] "10"
## [1] "36"
## [1] "18"
## [1] "38"
## [1] "8"
## [1] "12"
## [1] "11"
## [1] "25"
## [1] "9"
## [1] "1"
## [1] "6"
###########
#Save Data#
###########
saveRDS(annotated_scatter_list, file = paste0(file.path(path_Output_WGCNA, paste(paste(Species, Year, Season, Analysis, "annotated_scatter_list", Date, sep="_"), ".rds", sep=""))))
# scatter_list <-readRDS(file = paste0(file.path(path_Output_WGCNA, paste(paste(Species, Year, Season, Analysis, "annotated_scatter_list", Date, sep="_"), ".rds", sep=""))))
# A correlation coefficient of . 10 is thought to represent a weak or small association; a correlation coefficient of . 30 is considered a moderate correlation; and a correlation coefficient of . 50 or larger is thought to represent a strong or large correlation.
################
#ORA with plots# Loop
################
st_Gill_RNA <- 6
st_Liver_RNA <- 3
st_Gill_SSU <- 6
WGCNA_list <- readRDS(file = paste0(file.path(path_Output_WGCNA, paste(paste(Species, Year, Season, Analysis, Date, sep="_"), ".rds", sep=""))))
degrees_list <-readRDS(file = paste0(file.path(path_Output_WGCNA, paste(paste(Species, Year, Season, Analysis, "degrees_list", Date, sep="_"), ".rds", sep=""))))
#TPM Data
tpms <- list("tpmGill "= tpm_RNA_Gill, "tpmLiver" = tpm_RNA_Liver)
Corr_Filter <- 0.8
ORA_list <- list()
for (WGCNAset in names(WGCNA_list)[grepl("RNA", names(WGCNA_list))]) {
tryCatch({
require(tidyverse); require(WGCNA)
require(clusterProfiler)
print(WGCNAset)
ORA_list_length <- length(ORA_list)
Tissue <- sub(paste0(".*", "(Gill|Liver)", ".*"), "\\1", WGCNAset)
Type <- sub(paste0(".*", "(RNA|SSU)", ".*"), "\\1", WGCNAset)
omics_data <- as.data.frame(WGCNA_list[[WGCNAset]][grepl("omics_data", names(WGCNA_list[[WGCNAset]]))][[1]])
MEs <- as.data.frame(WGCNA_list[[WGCNAset]][grepl("MEs", names(WGCNA_list[[WGCNAset]]))][[1]])
SAM <- as.data.frame(WGCNA_list[[WGCNAset]][grepl("datTraits", names(WGCNA_list[[WGCNAset]]))][[1]])
TPM <- tpms[[names(tpms)[grepl(Tissue, names(tpms))]]]
TPM <- TPM[names(TPM) %in% rownames(SAM)]
rownames(TPM) <-gsub("[[:punct:]&&[^_]]|-", ".", rownames(TPM))
if (Type != "SSU") {
ANNO <- WGCNA_list[[WGCNAset]][grepl("GeneAnno", names(WGCNA_list[[WGCNAset]]))][[1]]
}
moduleColors <- WGCNA_list[[WGCNAset]][grepl("moduleColors", names(WGCNA_list[[WGCNAset]]))][[1]]
moduleLabels <- WGCNA_list[[WGCNAset]][grepl("moduleLabels", names(WGCNA_list[[WGCNAset]]))][[1]]
module_size<- as.data.frame(table(moduleLabels))
colnames(module_size) <- c("Module", "Size")
module_size %>% dplyr::mutate(Module_color = labels2colors(as.numeric(as.character(Module)))) -> module_size
degrees <- degrees_list[[paste("degrees", Tissue, Type, sep="_")]]
#############################################################################
#Creating GeneMembership and GeneSignificance Dataframes with GeneAnnotation#
#############################################################################
result_dataframe_list <- list()
for (i in names(MEs)) {
aa <- length(result_dataframe_list)
MODULE <- i
g <- i
gg <- sub("ME", "", g) #print(gg)
modNames = names(MEs) #substring(names(MEs), 2)
column = match(MODULE, modNames);
moduleGenes <- moduleLabels[moduleLabels == sub("ME","", MODULE)]
nSamples = nrow(omics_data)
# names (colors) of the modules
#Calculate Module Membership
geneModuleMembership = as.data.frame(cor(omics_data, MEs, use = "p"));
MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples));
names(geneModuleMembership) = paste("MM", modNames, sep="");
names(MMPvalue) = paste("p.MM", modNames, sep="");
MMPvalue$Geneid <-row.names(MMPvalue)
head(MMPvalue)
if (Type != "SSU") {
MMPvalue<- dplyr::left_join(MMPvalue[MMPvalue$Geneid %in% names(moduleGenes),],
ANNO[[paste("GeneAnno_", i, sep="")]])
}
if (Type == "SSU") {
MMPvalue <- MMPvalue[MMPvalue$Geneid %in% names(moduleGenes),]
}
MMPvalue$Module_color <- module_size[module_size$Module == paste(sub("ME","",i)),]$Module_color
head(MMPvalue)
result_dataframe_list[[aa+1]] <- MMPvalue
names(result_dataframe_list)[[aa+1]] <- paste(i, sep="_")
}
# B <- do.call(rbind, result_dataframe_list)
# B$Modules <- sub("\\..*","",rownames(B)); rownames(B) <-NULL
# head(B)
#add additional information on connectivity etc...
connectivity <- cbind(degrees, moduleColors, geneModuleMembership)
for (ii in names(result_dataframe_list)) {
result_dataframe_list[[ii]] <- left_join(result_dataframe_list[[ii]], connectivity %>%
mutate(Geneid = rownames(.)))
}
#Subset for correlation
print(Corr_Filter)
result_dataframe_list_corr <-list()
for (ii in names(result_dataframe_list)) {
aaa <- length(result_dataframe_list_corr)
result_dataframe_list_corr[[aaa+1]] <-
result_dataframe_list[[ii]][abs(result_dataframe_list[[ii]][paste("MMME", sub("ME","", ii), sep="")]) > Corr_Filter,]
names(result_dataframe_list_corr)[[aaa+1]] <- ii
}
}, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
EGOS <- list()
KEGGS <- list()
for (MODULE in names(result_dataframe_list_corr)) {
tryCatch({
egoUp <- data_frame()
KEGGUp <- data_frame()
a <- length(EGOS)
FILENAME <- paste(Species, Year, Season, Type, Tissue, Analysis, "ORA_Corr_Filter", Corr_Filter, sep="_")
TITLE <- paste("ORA_Corr_Filter", Corr_Filter, Species, Type, Tissue, sep="_" )
dedup_ids<- result_dataframe_list_corr[[MODULE]]
WGCNAORAPlotRKwhite6SLUC(TPM = TPM, vst = vst, Species = Species, TPM_value = 1,
Samples = rownames(SAM),
SAMDF = SAM, TITLE = TITLE, filename= FILENAME, module = sub("ME", "RNA", MODULE),
dedup_ids = dedup_ids)
##################
#Plot a selection#
##################
if (Type == "RNA" & Tissue == "Gill" & MODULE %in% c("ME2", "ME7", "ME11")) {
plot(KEGG_plot)
} else if (Tissue == "Liver" & MODULE %in% c("ME1", "ME3", "ME6", "ME7")) {
plot(KEGG_plot)
} else {
# print("Plots are saved to the pathPlots")
}
EGOS[[a+1]] <- egoUp
names(EGOS)[[a+1]] <- paste("ego", MODULE, sep="_")
KEGGS[[a+1]] <- KEGGUp
names(KEGGS)[[a+1]] <- paste("kegg", MODULE, sep="_")
#Remove Empty list elements
#sapply(EGOS,dim)
EGOS <- EGOS[lengths(EGOS) > 0]
filtered_list <- map(EGOS, ~ filter(.x, Count != 0))
# Remove empty list elements
EGOS <- keep(filtered_list, ~ nrow(.) > 0)
#sapply(EGOS,dim)
#sapply(KEGGS,dim)
KEGGS <- KEGGS[lengths(KEGGS) > 0]
filtered_list <- map(KEGGS, ~ filter(.x, Count != 0))
# Remove empty list elements
KEGGS <- keep(filtered_list, ~ nrow(.) > 0)
#sapply(KEGGS,dim)
}, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
}
WGCNA_MODULES_Corr_Filter <-list("EGOS" = EGOS,"KEGGS"=KEGGS)
#print(sapply(WGCNA_MODULES_Corr_Filter$EGOS ,dim))
#print(sapply(WGCNA_MODULES_Corr_Filter$KEGGS ,dim))
ORA_list[[ORA_list_length+1]] <- WGCNA_MODULES_Corr_Filter
names(ORA_list)[[ORA_list_length+1]] <- paste(Tissue, Type, "WGCNA_MODULES_Corr_Filter", Corr_Filter, sep="_")
}
## [1] "RNA_Gill_WGCNA"
## [1] 0.8
## [1] "tpm"
## [1] 32916 21
## [1] 20551 21
## [1] "tpm"
## [1] 32916 21
## [1] 20551 21
## [1] "tpm"
## [1] 32916 21
## [1] 20551 21
## [1] "tpm"
## [1] 32916 21
## [1] 20551 21
## [1] "tpm"
## [1] 32916 21
## [1] 20551 21
## [1] "tpm"
## [1] 32916 21
## [1] 20551 21
## [1] "tpm"
## [1] 32916 21
## [1] 20551 21
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : 'names' attribute [6] must be the same length as the vector [5]
## [1] "tpm"
## [1] 32916 21
## [1] 20551 21
## [1] "tpm"
## [1] 32916 21
## [1] 20551 21
## [1] "tpm"
## [1] 32916 21
## [1] 20551 21
## [1] "tpm"
## [1] 32916 21
## [1] 20551 21
## [1] "tpm"
## [1] 32916 21
## [1] 20551 21
## [1] "tpm"
## [1] 32916 21
## [1] 20551 21
## [1] "tpm"
## [1] 32916 21
## [1] 20551 21
## [1] "tpm"
## [1] 32916 21
## [1] 20551 21
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : 'names' attribute [13] must be the same length as the vector [12]
## [1] "tpm"
## [1] 32916 21
## [1] 20551 21
## [1] "tpm"
## [1] 32916 21
## [1] 20551 21
## [1] "tpm"
## [1] 32916 21
## [1] 20551 21
## [1] "tpm"
## [1] 32916 21
## [1] 20551 21
## [1] "tpm"
## [1] 32916 21
## [1] 20551 21
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : 'names' attribute [17] must be the same length as the vector [16]
## [1] "tpm"
## [1] 32916 21
## [1] 20551 21
## [1] "tpm"
## [1] 32916 21
## [1] 20551 21
## [1] "tpm"
## [1] 32916 21
## [1] 20551 21
## [1] "tpm"
## [1] 32916 21
## [1] 20551 21
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : 'names' attribute [18] must be the same length as the vector [17]
## [1] "tpm"
## [1] 32916 21
## [1] 20551 21
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : 'names' attribute [18] must be the same length as the vector [17]
## [1] "tpm"
## [1] 32916 21
## [1] 20551 21
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : 'names' attribute [18] must be the same length as the vector [16]
## [1] "tpm"
## [1] 32916 21
## [1] 20551 21
## [1] "tpm"
## [1] 32916 21
## [1] 20551 21
## [1] "tpm"
## [1] 32916 21
## [1] 20551 21
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : 'names' attribute [21] must be the same length as the vector [17]
## [1] "tpm"
## [1] 32916 21
## [1] 20551 21
## [1] "tpm"
## [1] 32916 21
## [1] 20551 21
## [1] "tpm"
## [1] 32916 21
## [1] 20551 21
## [1] "tpm"
## [1] 32916 21
## [1] 20551 21
## [1] "tpm"
## [1] 32916 21
## [1] 20551 21
## [1] "tpm"
## [1] 32916 21
## [1] 20551 21
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : 'names' attribute [27] must be the same length as the vector [26]
## [1] "tpm"
## [1] 32916 21
## [1] 20551 21
## [1] "tpm"
## [1] 32916 21
## [1] 20551 21
## [1] "tpm"
## [1] 32916 21
## [1] 20551 21
## [1] "tpm"
## [1] 32916 21
## [1] 20551 21
## [1] "RNA_Liver_WGCNA"
## [1] 0.8
## [1] "tpm"
## [1] 32916 22
## [1] 15590 22
## [1] "tpm"
## [1] 32916 22
## [1] 15590 22
## [1] "tpm"
## [1] 32916 22
## [1] 15590 22
## [1] "tpm"
## [1] 32916 22
## [1] 15590 22
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : 'names' attribute [4] must be the same length as the vector [3]
## [1] "tpm"
## [1] 32916 22
## [1] 15590 22
## [1] "tpm"
## [1] 32916 22
## [1] 15590 22
## [1] "tpm"
## [1] 32916 22
## [1] 15590 22
## [1] "tpm"
## [1] 32916 22
## [1] 15590 22
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : 'names' attribute [6] must be the same length as the vector [5]
## [1] "tpm"
## [1] 32916 22
## [1] 15590 22
## [1] "tpm"
## [1] 32916 22
## [1] 15590 22
## [1] "tpm"
## [1] 32916 22
## [1] 15590 22
## [1] "tpm"
## [1] 32916 22
## [1] 15590 22
## [1] "tpm"
## [1] 32916 22
## [1] 15590 22
## [1] "tpm"
## [1] 32916 22
## [1] 15590 22
## [1] "tpm"
## [1] 32916 22
## [1] 15590 22
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : 'names' attribute [10] must be the same length as the vector [9]
## [1] "tpm"
## [1] 32916 22
## [1] 15590 22
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : 'names' attribute [10] must be the same length as the vector [9]
## [1] "tpm"
## [1] 32916 22
## [1] 15590 22
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : 'names' attribute [10] must be the same length as the vector [9]
## [1] "tpm"
## [1] 32916 22
## [1] 15590 22
## [1] "tpm"
## [1] 32916 22
## [1] 15590 22
## [1] "tpm"
## [1] 32916 22
## [1] 15590 22
## [1] "tpm"
## [1] 32916 22
## [1] 15590 22
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : 'names' attribute [12] must be the same length as the vector [11]
## [1] "tpm"
## [1] 32916 22
## [1] 15590 22
## [1] "tpm"
## [1] 32916 22
## [1] 15590 22
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : 'names' attribute [13] must be the same length as the vector [12]
## [1] "tpm"
## [1] 32916 22
## [1] 15590 22
## [1] "tpm"
## [1] 32916 22
## [1] 15590 22
## [1] "tpm"
## [1] 32916 22
## [1] 15590 22
## [1] "tpm"
## [1] 32916 22
## [1] 15590 22
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : 'names' attribute [14] must be the same length as the vector [13]
## [1] "tpm"
## [1] 32916 22
## [1] 15590 22
## [1] "tpm"
## [1] 32916 22
## [1] 15590 22
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : 'names' attribute [15] must be the same length as the vector [14]
## [1] "tpm"
## [1] 32916 22
## [1] 15590 22
## [1] "tpm"
## [1] 32916 22
## [1] 15590 22
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : 'names' attribute [16] must be the same length as the vector [15]
## [1] "tpm"
## [1] 32916 22
## [1] 15590 22
## [1] "tpm"
## [1] 32916 22
## [1] 15590 22
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : 'names' attribute [17] must be the same length as the vector [16]
## [1] "tpm"
## [1] 32916 22
## [1] 15590 22
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : 'names' attribute [17] must be the same length as the vector [16]
## [1] "tpm"
## [1] 32916 22
## [1] 15590 22
## [1] "tpm"
## [1] 32916 22
## [1] 15590 22
## [1] "tpm"
## [1] 32916 22
## [1] 15590 22
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : 'names' attribute [18] must be the same length as the vector [17]
## [1] "tpm"
## [1] 32916 22
## [1] 15590 22
## [1] "tpm"
## [1] 32916 22
## [1] 15590 22
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : ℹ In argument: `..1 = p.adjust`.
## Caused by error:
## ! `..1` must be a vector, not a function.
## ERROR : 'names' attribute [19] must be the same length as the vector [18]
## [1] "tpm"
## [1] 32916 22
## [1] 15590 22
https://www.biostars.org/p/294268/ https://support.bioconductor.org/p/p134359/ https://www.informatics.jax.org/vocab/gene_ontology/GO:0046130 http://geneontology.org/docs/faq/#slims
Mapping granular annotations of a set of genes to one or more high-level, broader parent terms is referred to as “GO slimming”. GO slimming is commonly used to report an overview of a genome or to a set of summarize experimental results. GO hosts a number of predefined slim sets, a generic GO slim, and a number of slims tailored to give good coverage for some well studied/annotated model species. GO slimming will only be successful if the organism of interest has a body of GO annotation in the GO database (either electronic or manual).
What are the file formats used by the Gene Ontology? Ontology files are available in OBO, OWL, and some files are available in JSON. Refer to the ontology downloads page for information on ontology files.
Annotations are available in GAF (Gene Association File), or by the companion files Gene Product Association Data (GPAD) + Gene Product Information (GPI). For general introduction to the Gene Ontology’s annotation file formats, see GAF 2.2 and GPAD file format/GPI file pages.
https://support.bioconductor.org/p/128407/
#BiocManager::install("GSEABase")
#BiocManager::install("GOfuncR")
#install.packages("ontologyIndex")
#Download GO SLIM#
#http://geneontology.org/docs/download-ontology/
#Move obo dataset into GSEAbase package library
#mv /Users/admin/Downloads/goslim_agr.obo /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/GSEABase/extdata
#mv /Users/admin/Downloads/goslim_mouse.obo /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/GSEABase/extdata
for (WGCNAset in names(ORA_list)) {
Tissue <- sub(paste0(".*", "(Gill|Liver)", ".*"), "\\1", WGCNAset)
Type <- sub(paste0(".*", "(RNA|SSU)", ".*"), "\\1", WGCNAset)
g <- WGCNAset
########
#SLIMGO#
########
Tissue_EGO_List <- ORA_list[[WGCNAset]]$EGOS
Tissue_EGO_List <- Tissue_EGO_List[sapply(Tissue_EGO_List, function(x) length(x) > 0)]
SLIMGO_mouse <-list()
for (ii in names(Tissue_EGO_List)) {
require(GSEABase)
a <- length(SLIMGO_mouse)
# Vector of GO IDs
go_ids <- Tissue_EGO_List[[ii]]$ID
# Create GSEAbase GOCollection using `go_ids`
myCollection <- GOCollection(go_ids)
# Retrieve GOslims from GO OBO file set
#slim <- getOBOCollection(gseabase_files)
#Download GO SLIM#
#http://geneontology.org/docs/download-ontology/
#Move obo dataset into GSEAbase package library
#mv /Users/admin/Downloads/goslim_agr.obo /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/GSEABase/extdata
#mv /Users/admin/Downloads/goslim_mouse.obo /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/GSEABase/extdata
fl <- system.file("extdata", "goslim_mouse.obo", package="GSEABase")
slim <- getOBOCollection(fl)
slimdf <- goSlim(myCollection, slim, "BP", verbose)
# List of GOslims and all GO IDs from `go_ids`
gomap <- as.list(GOBPOFFSPRING[rownames(slimdf)])
# Maps `go_ids` to matching GOslims
mapped <- lapply(gomap, intersect, ids(myCollection))
# Append all mapped GO IDs to `slimdf`
# `sapply` needed to apply paste() to create semi-colon delimited values
slimdf$ids <- sapply(lapply(gomap, intersect, ids(myCollection)), paste, collapse=";")
# Remove "character(0) string from "ids" column
slimdf$ids[slimdf$ids == "character(0)"] <- ""
#https://support.bioconductor.org/p/128407/ credits to SamWhite
# Add self-matching GOIDs to "ids" column, if not present
for (go_id in go_ids) {
# Check if the go_id is present in the row names
if (go_id %in% rownames(slimdf)) {
# Check if the go_id is not present in the ids column
# Also removes white space "trimws()" and converts all to upper case to handle
# any weird, "invisible" formatting issues.
if (!go_id %in% trimws(toupper(strsplit(slimdf[go_id, "ids"], ";")[[1]]))) {
# Append the go_id to the ids column with a semi-colon separator
if (length(slimdf$ids) > 0 && nchar(slimdf$ids[nrow(slimdf)]) > 0) {
slimdf[go_id, "ids"] <- paste0(slimdf[go_id, "ids"], "; ", go_id)
} else {
slimdf[go_id, "ids"] <- go_id
}}}}
rm(go_id)
SLIMGO_mouse[[a+1]] <- slimdf
names(SLIMGO_mouse)[[a+1]] <- paste("slim", ii, sep="_")
rm(slimdf)
rm(a)
}
SLIMGO_generic<-list()
for (iii in names(Tissue_EGO_List)) {
require(GSEABase)
a <- length(SLIMGO_generic)
# Vector of GO IDs
go_ids <- Tissue_EGO_List[[iii]]$ID
# Create GSEAbase GOCollection using `go_ids`
myCollection <- GOCollection(go_ids)
# Retrieve GOslims from GO OBO file set
#slim <- getOBOCollection(gseabase_files)
#Download GO SLIM#
#http://geneontology.org/docs/download-ontology/
#Move obo dataset into GSEAbase package library
#mv /Users/admin/Downloads/goslim_agr.obo /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/GSEABase/extdata
#mv /Users/admin/Downloads/goslim_mouse.obo /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/GSEABase/extdata
fl <- system.file("extdata", "goslim_generic.obo", package="GSEABase")
slim <- getOBOCollection(fl)
slimdf <- goSlim(myCollection, slim, "BP", verbose)
# List of GOslims and all GO IDs from `go_ids`
gomap <- as.list(GOBPOFFSPRING[rownames(slimdf)])
# Maps `go_ids` to matching GOslims
mapped <- lapply(gomap, intersect, ids(myCollection))
# Append all mapped GO IDs to `slimdf`
# `sapply` needed to apply paste() to create semi-colon delimited values
slimdf$ids <- sapply(lapply(gomap, intersect, ids(myCollection)), paste, collapse=";")
# Remove "character(0) string from "ids" column
slimdf$ids[slimdf$ids == "character(0)"] <- ""
#https://support.bioconductor.org/p/128407/ credits to SamWhite
# Add self-matching GOIDs to "ids" column, if not present
for (go_id in go_ids) {
# Check if the go_id is present in the row names
if (go_id %in% rownames(slimdf)) {
# Check if the go_id is not present in the ids column
# Also removes white space "trimws()" and converts all to upper case to handle
# any weird, "invisible" formatting issues.
if (!go_id %in% trimws(toupper(strsplit(slimdf[go_id, "ids"], ";")[[1]]))) {
# Append the go_id to the ids column with a semi-colon separator
if (length(slimdf$ids) > 0 && nchar(slimdf$ids[nrow(slimdf)]) > 0) {
slimdf[go_id, "ids"] <- paste0(slimdf[go_id, "ids"], "; ", go_id)
} else {
slimdf[go_id, "ids"] <- go_id
rm(go_id)
}}}}
SLIMGO_generic[[a+1]] <- slimdf
names(SLIMGO_generic)[[a+1]] <- paste("slim", iii, sep="_")
rm(slimdf)
rm(a)
}
# SLIMGO_pir<-list()
# for (iiii in names(Tissue_EGO_List )) {
# require(GSEABase)
# a <- length(SLIMGO_pir)
# # Vector of GO IDs
# go_ids <- Tissue_EGO_List[[iiii]]$ID
# # Create GSEAbase GOCollection using `go_ids`
# myCollection <- GOCollection(go_ids)
# # Retrieve GOslims from GO OBO file set
# #slim <- getOBOCollection(gseabase_files)
# #Download GO SLIM#
# #http://geneontology.org/docs/download-ontology/
# #Move obo dataset into GSEAbase package library
# #mv /Users/admin/Downloads/goslim_agr.obo /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/GSEABase/extdata
# #mv /Users/admin/Downloads/goslim_mouse.obo /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/GSEABase/extdata
# fl <- system.file("extdata", "goslim_pir.obo", package="GSEABase")
# slim <- getOBOCollection(fl)
# slimdf <- goSlim(myCollection, slim, "BP", verbose)
# # List of GOslims and all GO IDs from `go_ids`
# gomap <- as.list(GOBPOFFSPRING[rownames(slimdf)])
# # Maps `go_ids` to matching GOslims
# mapped <- lapply(gomap, intersect, ids(myCollection))
# # Append all mapped GO IDs to `slimdf`
# # `sapply` needed to apply paste() to create semi-colon delimited values
# slimdf$ids <- sapply(lapply(gomap, intersect, ids(myCollection)), paste, collapse=";")
# # Remove "character(0) string from "ids" column
# slimdf$ids[slimdf$ids == "character(0)"] <- ""
# #https://support.bioconductor.org/p/128407/ credits to SamWhite
# # Add self-matching GOIDs to "ids" column, if not present
# for (go_id in go_ids) {
# # Check if the go_id is present in the row names
# if (go_id %in% rownames(slimdf)) {
# # Check if the go_id is not present in the ids column
# # Also removes white space "trimws()" and converts all to upper case to handle
# # any weird, "invisible" formatting issues.
# if (!go_id %in% trimws(toupper(strsplit(slimdf[go_id, "ids"], ";")[[1]]))) {
# # Append the go_id to the ids column with a semi-colon separator
# if (length(slimdf$ids) > 0 && nchar(slimdf$ids[nrow(slimdf)]) > 0) {
# slimdf[go_id, "ids"] <- paste0(slimdf[go_id, "ids"], "; ", go_id)
# } else {
# slimdf[go_id, "ids"] <- go_id
# }}}}
# rm(go_id)
# SLIMGO_pir[[a+1]] <- slimdf
# names(SLIMGO_pir)[[a+1]] <- paste("slim", iiii, sep="_")
# rm(slimdf)
# rm(a)
#}
#Clean Data#
# Remove empty list elements
SLIMGO_mouse <- keep(map(SLIMGO_mouse, ~ filter(.x, Count != 0)), ~ nrow(.) > 0)
#sapply(SLIMGO_mouse,dim)
SLIMGO_generic <- keep(map(SLIMGO_generic, ~ filter(.x, Count != 0)), ~ nrow(.) > 0)
#sapply(SLIMGO_generic,dim)
#SLIMGO_pir <- keep(map(SLIMGO_pir, ~ filter(.x, Count != 0)), ~ nrow(.) > 0)
#sapply(SLIMGO_pir,dim)
##########
#SLIMKEGG#
##########
KeggPathwayMapsGenes <- readRDS(file = paste0(file.path(path_Input_RNA,
"KeggPathwayMapsGenes08.05.2023"), ".rds"))
KeggPathwayMapsGenes$ID <- KeggPathwayMapsGenes$PathwayID
Tissue_KEGG_List <- ORA_list[[g]]$KEGGS
Tissue_KEGG_List <- Tissue_KEGG_List[sapply(Tissue_KEGG_List, function(x) length(x) > 0)]
SLIMKEGG<-list()
for (x in names(Tissue_KEGG_List)) {
aa <- length(SLIMKEGG)
A <- as.data.frame(Tissue_KEGG_List[[x]])
A <- dplyr::left_join(A, KeggPathwayMapsGenes[KeggPathwayMapsGenes$ID %in% A$ID,])
SLIMKEGG[[aa+1]] <- A
names(SLIMKEGG)[[aa+1]] <- paste("slim", x, sep="_")
}
#sapply(SLIMKEGG ,dim)
filtered_list <- map(SLIMKEGG, ~ filter(.x, Count != 0))
# Remove empty list elements
SLIMKEGG <- keep(filtered_list, ~ nrow(.) > 0)
#sapply(SLIMKEGG,dim)
ORA_list[[WGCNAset]]$SLIMGO_mouse <- SLIMGO_mouse
ORA_list[[WGCNAset]]$SLIMGO_generic <- SLIMGO_generic
#ORA_list[[WGCNAset]]$SLIMGO_pir <- SLIMGO_pir
ORA_list[[WGCNAset]]$SLIMKEGG <- SLIMKEGG
}
###########
#Save Data#
###########
###########
#Save Data#
###########
saveRDS(ORA_list$Gill_RNA_WGCNA_MODULES_Corr_Filter_0.8, file = paste0(file.path(path_Output_WGCNA, paste(paste(Species, Year, Season, Analysis, "ORA_list_Gill", Corr_Filter, Date, sep="_"), ".rds", sep=""))))
saveRDS(ORA_list$Liver_RNA_WGCNA_MODULES_Corr_Filter_0.8, file = paste0(file.path(path_Output_WGCNA, paste(paste(Species, Year, Season, Analysis, "ORA_list_Liver", Corr_Filter, Date, sep="_"), ".rds", sep=""))))
ORA_list <- list()
ORA_list$Gill_RNA_WGCNA_MODULES_Corr_Filter_0.8 <- readRDS(file = paste0(file.path(path_Output_WGCNA, paste(paste(Species, Year, Season, Analysis, "ORA_list_Gill", Corr_Filter, Date, sep="_"), ".rds", sep=""))))
ORA_list$Liver_RNA_WGCNA_MODULES_Corr_Filter_0.8 <- readRDS(file = paste0(file.path(path_Output_WGCNA, paste(paste(Species, Year, Season, Analysis, "ORA_list_Liver", Corr_Filter, Date, sep="_"), ".rds", sep=""))))
for (WGCNAset in names(ORA_list)) {
Tissue <- sub(paste0(".*", "(Gill|Liver)", ".*"), "\\1", WGCNAset)
Type <- sub(paste0(".*", "(RNA|SSU)", ".*"), "\\1", WGCNAset)
Analysis <- sub(paste0(".*", "(WGCNA)", ".*"), "\\1", WGCNAset)
Corr_Filter <- sub(paste0(".*", "(Corr_Filter....)", ".*"), "\\1", WGCNAset)
Corr_Filter <- sub("Corr_Filter_", "", Corr_Filter)
GeneAnno <- WGCNA_list[[paste(Type, Tissue, Analysis, sep="_")]][grepl("GeneAnno",
names(WGCNA_list[[paste(Type, Tissue, Analysis, sep="_")]]))][[1]]
WGCNAset_list <- ORA_list[[WGCNAset]]
for (i in names(WGCNAset_list)[grepl("SLIMKEGG", names(WGCNAset_list))]) {
require(org.Hs.eg.db)
SLIMKEGG <- WGCNAset_list[[i]]
#Just to add in readbale geneSymbols for the EntrezID
for (ii in names(SLIMKEGG)) {
MODULE <- sub("slim_kegg_", "", ii)
ANNO <- GeneAnno[[which(sub("GeneAnno_", "", names(GeneAnno)) %in% MODULE)]]
df_Keggs <- as.data.frame(SLIMKEGG[[ii]])
geneID_split <- str_split(df_Keggs$geneID, "/")
GeneSymbols <- character(length(geneID_split))
for (x in 1:length(geneID_split)) {
HS <- AnnotationDbi::select(org.Hs.eg.db, as.character(geneID_split[[x]]), "SYMBOL", "ENTREZID")
GeneSymbols[x] <- paste(HS$SYMBOL, collapse = "/")}
GeneSymbolsSL <- character(length(geneID_split))
for (x in 1:length(geneID_split)) {
trial <- left_join(data.frame(ENTREZID = geneID_split[[x]]), ANNO)
names(trial)[names(trial) == "Geneid"] <- "SLgeneID"
GeneSymbolsSL[x] <- paste(trial$SLgeneID, collapse = "/")
}
GeneSymbolsSLunmapped <- character(length(geneID_split))
for (x in 1:length(geneID_split)) {
GeneSymbolsSLunmapped[x] <- paste(ANNO[is.na(ANNO$ENTREZID), ]$GeneSymbolHS, collapse = "/")
}
df_Keggs$GeneSymbolHS <- GeneSymbols
df_Keggs$GeneSymbolSL <- GeneSymbolsSL
df_Keggs$UnMapped <- GeneSymbolsSLunmapped
#Save as CSV
write.csv2(df_Keggs, file = paste0(file.path(path_Output_WGCNA,paste(paste(Species, Year, Season, Analysis, Tissue, "Corr_filter", Corr_Filter, sub("ME", "RNA",ii), sep="_"), "csv", sep="."))))
}
}
}
WGCNA_KEGG_SLIMS <- list(
"Gill_Slim" =ORA_list$Gill_RNA_WGCNA_MODULES_Corr_Filter_0.8$SLIMKEGG,
"Liver_Slim" = ORA_list$Liver_RNA_WGCNA_MODULES_Corr_Filter_0.8$SLIMKEGG)
#######################
#Gill & Liver Combined#
#######################
Slims <- list()
for (i in names(WGCNA_KEGG_SLIMS)[grepl("Gill|Liver", names(WGCNA_KEGG_SLIMS))]) {
#https://github.com/kevinblighe/E-MTAB-6141
tryCatch({
a <- length(Slims )
g <- paste(i)
gg <- gsub(paste0(".*", "(Gill|Liver)", ".*"), "\\1", i)
A <- WGCNA_KEGG_SLIMS[[i]]
#Add the KeggPathwayMaps
KeggPathwayMapsGenes$ID <- KeggPathwayMapsGenes$PathwayID
for (ii in names(A)) {
A[[ii]] <- dplyr::left_join(A[[ii]], KeggPathwayMapsGenes[KeggPathwayMapsGenes$ID %in% A[[ii]]$ID,])}
B <- do.call(rbind.data.frame, A)
#Including or Excluding Unknown Pathways in the Slim
#These were not included in the Kegg-Slim so I dont know how many genes would relate to them, this is just a dircrepancy in timing between last Kegg database reading and Slim-download
B[is.na(B$Parent),]
B <- B[B$Description != "Efferocytosis",]
B <- B[B$Description != "ATP-dependent chromatin remodeling",]
B <- B[B$Description != "Polycomb repressive complex",]
B[is.na(B$Parent),]
# UnknownParents <- c("ATP-dependent chromatin remodeling", "Polycomb repressive complex")
# for (x in UnknownParents ) {
# B[B$Description == x,]$Parent <-
# B[B$Description == x,]$category
#
# B[B$Description == x,]$Child <-
# B[B$Description == x,]$subcategory
# }
# B[is.na(B$Parent),]
B$Modules <- sub("\\..*","",rownames(B))
B$Modules <- sub("slim_kegg_","",B$Modules)
rownames(B) <- NULL
trial <- as.data.frame(B %>%
dplyr::group_by(Modules, Child) %>%
dplyr::summarise(GeneCount = length(unique(as.vector(unique(unlist(str_split(geneID, "/"))))))))
trial2 <- as.data.frame(B %>%
dplyr::group_by(Child) %>%
dplyr::summarise(ChildGeneCount = length(unique(as.vector(unique(unlist(str_split(GeneID, "/"))))))))
trial3 <- dplyr::left_join(trial, trial2)
trial3$ModuleRatio <- trial3$GeneCount / trial3$ChildGeneCount * 100
B <- dplyr::left_join(B, trial3)
B$PathwayActivation <- -log2(B$p.adjust)
#BB <- BB[c("Modules", "Description", "GeneRatio", "p.adjust", "Child", "Parent", "ModuleRatio")]
B$Modules <- paste("RNA", sub("ME", "", B$Modules), sep="")
B$Tissue <- paste(gg)
B$ModulesTissue <- paste(gg, B$Modules, sep="_")
#Select only Enrichment that are correlated (0.3) to the abipotics and physiology and exclude module 0
InterestingComparison <- ModsOfInterst_list[[grep(paste(gg, "RNA", sep="_"), names(ModsOfInterst_list))]]
InterestingComparison <- lapply(InterestingComparison, rownames_to_column)
InterestingComparison <- do.call(rbind, lapply(names(InterestingComparison), function(name) {
data.frame(name, InterestingComparison[[name]], stringsAsFactors = FALSE)}))
names(InterestingComparison) <- c("variable", "module", "correlation", "p_value")
InterestingComparison$module <- paste(sub("ME","RNA", InterestingComparison$module))
InterestingComparison <- InterestingComparison[InterestingComparison$module != "RNA0", ]
BB <- B
BB <- BB[BB$Modules %in% unique(InterestingComparison$module),]
b <- length(Slims)
Slims[[b+1]] <- BB
names(Slims)[[b+1]] <- gg
}, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})}
Slim <- do.call(rbind.data.frame, Slims)
rownames(Slim) <- c()
Order <- c("Gill", "Liver")
mg <- ggplot(Slim, aes(x = Modules, y = Child, color=PathwayActivation, size = ModuleRatio)) + geom_point() + scale_size(range = c(1, 15))
#scale_color_manual(colours = c("blue", "red"))
#scale_colour_manual(values=c("blue", "red"))
mg <- mg + facet_grid(Parent ~ factor(Tissue, levels=Order), scales = "free", space = "free") +
scale_color_gradientn(colours = c("blue", "pink", "red", "red"),
values = scales::rescale(x = c(0, 1, 5, 15), to = c(0,1), from = c(0, 15) )) +
guides(color = guide_legend(title = "-log2(p_adjust)"), size = guide_legend(title = "-log2(p_adjust)")) +
#theme_minimal() +
atheme +
theme(strip.text.y = element_text(angle = 0)) +
theme(#axis.text.x=element_blank(),
#axis.ticks.x=element_blank(),
axis.text.y=element_blank(),
axis.title.y.left = element_blank(),
axis.ticks.y =element_blank()
#axis.title.x = element_blank()
) +
theme(
panel.background = element_rect(fill='transparent'), #transparent panel bg
plot.background = element_rect(fill='transparent', color=NA), #transparent plot bg
#panel.grid.major = element_blank(), #remove major gridlines
#panel.grid.minor = element_blank(), #remove minor gridlines
legend.background = element_rect(fill='transparent'), #transparent legend bg
legend.box.background = element_rect(fill='transparent'), #transparent legend panel
panel.grid.major = element_line(colour = "grey40"),
panel.grid.minor = element_line(colour = "grey40")
) +
theme(axis.title.x.bottom = element_text(color="grey13"),
strip.text = element_text(color = "black", face= "bold"))
g <- ggplot_gtable(ggplot_build(mg))
stripr <- which(grepl('strip-t', g$layout$name))
fills <- alpha(c("salmon", "darkred"), 0.4)
k <- 1
for (x in stripr) {
j <- which(grepl('rect', g$grobs[[x]]$grobs[[1]]$childrenOrder))
g$grobs[[x]]$grobs[[1]]$children[[j]]$gp$fill <- fills[k]
k <- k+1}
prow <- cowplot::plot_grid(g, labels = c(""), ncol = 1)
AA<- print(cowplot::plot_grid(prow, ncol = 1, rel_heights = c(0.1, 0.99)))
ggsave(AA, filename = paste(paste(Species, Year, Season, Analysis, "Slim_Tissues_KEGGPathway", sep="_"), ".png", sep=""), path = pathPlots , device='png', dpi=300, width = 12,height = 9)
######
#KEGG#
######
KeggPathwayMapsGenes <- readRDS(file = paste0(file.path(path_Input_RNA,
"KeggPathwayMapsGenes08.05.2023"), ".rds"))
KeggPathwayMapsGenes$ID <- KeggPathwayMapsGenes$PathwayID
KeggPathwayMapsGenes$PathwayGeneID <- KeggPathwayMapsGenes$GeneID
KeggPathwayMapsGenes$PathwaySymbol <- KeggPathwayMapsGenes$Symbol
B <- do.call(rbind.data.frame, ORA_list$Gill_RNA_WGCNA_MODULES_Corr_Filter_0.8$SLIMKEGG)
unique(B$Parent)
## [1] "Genetic Information Processing"
## [2] "Cellular Processes"
## [3] "Human Diseases"
## [4] "Organismal Systems"
## [5] "Environmental Information Processing"
## [6] "Metabolic pathways"
## [7] "Nucleotide metabolism"
## [8] "Carbon metabolism"
## [9] "2-Oxocarboxylic acid metabolism"
## [10] "Biosynthesis of amino acids"
## [11] "Biosynthesis of nucleotide sugars"
## [12] NA
B$Modules <- gsub("\\..*","",rownames(B))
B$Modules <- gsub("slim_kegg_","",B$Modules)
B <- B[B$Modules != "ME0",]
rownames(B) <- NULL
trial <- as.data.frame(B %>%
dplyr::group_by(Modules, Child) %>% dplyr::summarise(GeneCount = length(unique(as.vector(unique(unlist(str_split(geneID, "/"))))))))
trial2 <- as.data.frame(B %>%
dplyr::group_by(Child) %>%
dplyr::summarise(ChildGeneCount = length(unique(as.vector(unique(unlist(str_split(GeneID, "/"))))))))
trial3 <- dplyr::left_join(trial, trial2)
trial3$ModuleRatio <- trial3$GeneCount / trial3$ChildGeneCount * 100
B <- dplyr::left_join(B, trial3)
B$PathwayActivation <- -log2(B$p.adjust)
BB <- B
BB <- BB[c("Modules", "Description", "GeneRatio", "p.adjust", "Child", "Parent", "ModuleRatio", "PathwayActivation")]
BB$Modules <- paste("RNA", sub("ME", "", BB$Modules), sep="")
BB <- BB[BB$Modules != "RNA0",]
#Select only Enrichment that are correlated (0.3) to the abipotics and physiology and exclude module 0
InterestingComparison <- ModsOfInterst_list[[grep(paste(gg, "RNA", sep="_"), names(ModsOfInterst_list))]]
InterestingComparison <- lapply(InterestingComparison, rownames_to_column)
InterestingComparison <- do.call(rbind, lapply(names(InterestingComparison), function(name) {
data.frame(name, InterestingComparison[[name]], stringsAsFactors = FALSE)}))
names(InterestingComparison) <- c("variable", "module", "correlation", "p_value")
InterestingComparison$module <- paste(sub("ME","RNA", InterestingComparison$module))
InterestingComparison <- InterestingComparison[InterestingComparison$module != "RNA0", ]
BB$CorrelationToTraits <-ifelse(BB$Modules %in% InterestingComparison$name, "Cor > 0.3", "Cor < 0.3")
Order <- c("Cor > 0.3", "Cor < 0.3")
mg <- ggplot(BB, aes(x = Modules, y = Child, color=PathwayActivation, size = ModuleRatio)) + geom_point() + scale_size(range = c(3, 20), limits = c(0, 100)) +
scale_color_gradientn(colours = c("blue", "pink", "red", "red"),
values = scales::rescale(x = c(0, 0.1, 1, 200), to = c(0,1), from = c(0, 200) ))
mg <- mg + facet_grid(Parent ~ factor(CorrelationToTraits, levels=Order), scales = "free", space = "free") +
guides(color = guide_legend(title = "-log2(p_adjust)"), size = guide_legend(title = "Ratio\n[%Genes per Level]"))+
xlab("\n\nAhead AADT") +
#theme_minimal() +
atheme +
theme(strip.text.y = element_text(angle = 0)) +
theme(axis.text.x=element_blank(),
#axis.ticks.x=element_blank(),
axis.text.y=element_blank(),
axis.title.y.left = element_blank(),
axis.ticks.y =element_blank()
#axis.title.x = element_blank()
) +
theme(
panel.background = element_rect(fill='transparent'), #transparent panel bg
plot.background = element_rect(fill='transparent', color=NA), #transparent plot bg
#panel.grid.major = element_blank(), #remove major gridlines
#panel.grid.minor = element_blank(), #remove minor gridlines
legend.background = element_rect(fill='transparent'), #transparent legend bg
legend.box.background = element_rect(fill='transparent') #transparent legend panel
) +
theme(axis.title.x.bottom = element_text(color="grey13"),
strip.text.y.right = element_text(size=12, color = "black", face= "bold"),
axis.text.y.left = element_text(size=12, face = "bold", color= OutlineColor),
axis.text.x.bottom = element_text(size=10,face = "bold", angle = 45, hjust = 1, color= OutlineColor),
legend.text=element_text(size=12),
legend.title=element_text(size=12))
#, legend.position="none")
g <- ggplot_gtable(ggplot_build(mg))
stripr <- which(grepl('strip-t', g$layout$name))
fills <- alpha(c("red", "blue"), 0.4)
k <- 1
for (i in stripr) {
j <- which(grepl('rect', g$grobs[[i]]$grobs[[1]]$childrenOrder))
g$grobs[[i]]$grobs[[1]]$children[[j]]$gp$fill <- fills[k]
k <- k+1}
prow <- cowplot::plot_grid(g, labels = c(""), ncol = 1)
AA<- print(cowplot::plot_grid(prow, ncol = 1, rel_heights = c(0.01, 0.999)))
ggsave(AA, filename = paste(paste(Species, Year, Season, Analysis, "Slim_KEGGPathway", Tissue, sep="_"), ".png", sep=""), path = pathPlots,
device='png', dpi=300, width = 13,height = 12)
######
#Gill#
######
OxyModules <- c(
2,
4,
7,
10,
11,
21,
25,
28,
34,
38)
OxyModules <- paste("RNA", OxyModules, sep="")
mg <- ggplot(BB[BB$Modules %in% OxyModules,], aes(x = Modules, y = Child, color=PathwayActivation, size = PathwayActivation)) + geom_point() + scale_size(range = c(3, 15)) +
#scale_color_gradientn(colours = c("blue", "pink", "red", "red"),
# values = scales::rescale(x = c(0, 1, 5, 15), to = c(0,1), from = c(0, 15) ))
scale_color_gradientn(colours = c("blue", "pink", "red", "red"),
values = scales::rescale(x = c(0, 0.1, 1, 200), to = c(0,1), from = c(0, 200) ))
mg <- mg + facet_grid(Parent ~ factor(CorrelationToTraits, levels=Order), scales = "free", space = "free") +
xlab("\n\nAhead AADT") +
#theme_minimal() +
atheme +
theme(strip.text.y = element_text(angle = 0)) +
theme(axis.text.x=element_blank(),
#axis.ticks.x=element_blank(),
axis.text.y=element_blank(),
axis.title.y.left = element_blank(),
axis.ticks.y =element_blank()
#axis.title.x = element_blank()
) +
theme(
panel.background = element_rect(fill='transparent'), #transparent panel bg
plot.background = element_rect(fill='transparent', color=NA), #transparent plot bg
#panel.grid.major = element_blank(), #remove major gridlines
#panel.grid.minor = element_blank(), #remove minor gridlines
legend.background = element_rect(fill='transparent'), #transparent legend bg
legend.box.background = element_rect(fill='transparent') #transparent legend panel
) +
theme(axis.title.x.bottom = element_text(color="grey13"),
strip.text.y.right = element_text(size=12, color = "black", face= "bold"),
axis.text.y.left = element_text(size=12, face = "bold", color= OutlineColor),
axis.text.x.bottom = element_text(size=10,face = "bold", angle = 45, hjust = 1, color= OutlineColor),
legend.position="none")
g <- ggplot_gtable(ggplot_build(mg))
stripr <- which(grepl('strip-t', g$layout$name))
fills <- alpha(c("red", "blue"), 0.4)
k <- 1
for (i in stripr) {
j <- which(grepl('rect', g$grobs[[i]]$grobs[[1]]$childrenOrder))
g$grobs[[i]]$grobs[[1]]$children[[j]]$gp$fill <- fills[k]
k <- k+1}
prow <- cowplot::plot_grid(g, labels = c(""), ncol = 1)
AA<- print(cowplot::plot_grid(prow, ncol = 1, rel_heights = c(0.01, 0.999)))
ggsave(AA, filename = paste(paste(Species, Year, Season, Analysis, "Slim_Tissues_KEGGPathway_Oxygen", sep="_"), ".png", sep=""), path = pathPlots , device='png', dpi=300, width = 8,height = 9)
######
#Gill#
######
############################
#For Subsets of SSU_Modules#
############################
#ModulesOfInterst <-c("SSU2","SSU3","SSU5","SSU6", "SSU9", "SSU10")
#BB <- B[B$Modules %in% ModulesOfInterst,]
#BB <- BB[c("Modules", "Description", "GeneRatio", "p.adjust", "Child", "Parent", "ModuleRatio")]
BB <- B
BB$Modules <- paste("RNA", sub("ME", "", BB$Modules), sep="")
table(BB[BB$Parent == "Organismal Systems",]$Child)
##
## Aging Circulatory system
## 1 4
## Development and regeneration Digestive system
## 3 3
## Endocrine system Environmental adaptation
## 9 2
## Excretory system Immune system
## 2 17
## Nervousry system Sensorysystem
## 4 2
BBImmune <- BB[BB$Child %in% c(
"Immune system"
), ]
mg <- ggplot(BBImmune, aes(x = Modules, y = Description, color=PathwayActivation , size = PathwayActivation )) +
geom_point() + scale_size(range = c(3, 20)) +
scale_color_gradientn(colours = c("blue", "pink", "red", "red"),
values = scales::rescale(x = c(0, 1, 5, 50), to = c(0,1), from = c(0, 50) ))
mg <- mg + facet_grid(rows = vars(BBImmune$Child), , scales = "free", space = "free") +
#mg <- mg + facet_grid(Child ~ factor(CorrelationToTraits, levels=Order), scales = "free", space = "free") +
guides(color = guide_legend(title = "-log2(p_adjust)"), size = guide_legend(title = "-log2(p_adjust)")) +
xlab("\n\nAhead AADT") +
#theme_minimal() +
atheme +
theme(strip.text.y = element_text(angle = 0)) +
theme(#axis.text.x=element_blank(),
#axis.ticks.x=element_blank(),
axis.text.y=element_blank(),
axis.title.y.left = element_blank(),
axis.ticks.y =element_blank()
#axis.title.x = element_blank()
) +
theme(
panel.background = element_rect(fill='transparent'), #transparent panel bg
plot.background = element_rect(fill='transparent', color=NA), #transparent plot bg
#panel.grid.major = element_blank(), #remove major gridlines
#panel.grid.minor = element_blank(), #remove minor gridlines
legend.background = element_rect(fill='transparent'), #transparent legend bg
legend.box.background = element_rect(fill='transparent'), #transparent legend panel
panel.grid.major = element_line(colour = "grey40"),
panel.grid.minor = element_line(colour = "grey40")
) +
theme(axis.title.x.bottom = element_text(color="grey13"),
strip.text = element_text(color = "black", face= "bold"))
g <- ggplot_gtable(ggplot_build(mg))
stripr <- which(grepl('strip-t', g$layout$name))
fills <- alpha(c("salmon", "darkred"), 0.4)
k <- 1
for (x in stripr) {
j <- which(grepl('rect', g$grobs[[x]]$grobs[[1]]$childrenOrder))
g$grobs[[x]]$grobs[[1]]$children[[j]]$gp$fill <- fills[k]
k <- k+1}
prow <- cowplot::plot_grid(g, labels = c(""), ncol = 1)
AA<- print(cowplot::plot_grid(prow, ncol = 1, rel_heights = c(0.1, 0.99)))
ggsave(AA, filename = paste(paste(Species, Year, Season, Analysis, "Slim_Tissues_KEGGPathway_ImmuneSystem", sep="_"), ".png", sep=""), path = pathPlots , device='png', dpi=300, width = 8,height = 9)
ModsOfInterst_list <- readRDS(file = paste0(file.path(path_Output_WGCNA, paste(paste(Species, Year, Season, Analysis, "ModsOfInterst_list", Date, sep="_"), ".rds", sep=""))))
####
#GO#
####
WGCNA_KEGG_SLIMS_GO <- list(
"Gill_Slim" =ORA_list$Gill_RNA_WGCNA_MODULES_Corr_Filter_0.8$SLIMGO_mouse,
"Liver_Slim" = ORA_list$Liver_RNA_WGCNA_MODULES_Corr_Filter_0.8$SLIMGO_mouse)
#######################
#Gill & Liver Combined#
#######################
Slims_GO <- list()
for (i in names(WGCNA_KEGG_SLIMS_GO)[grepl("Gill|Liver", names(WGCNA_KEGG_SLIMS_GO))]) {
tryCatch({
a <- length(Slims_GO)
g <- paste(i)
gg <- gsub(paste0(".*", "(Gill|Liver)", ".*"), "\\1", i)
A <- WGCNA_KEGG_SLIMS_GO[[i]]
B <- do.call(rbind.data.frame, A)
B$Modules <- gsub("\\..*","",rownames(B))
unique(B$Modules)
B$Modules <- gsub("slim_ego_","",B$Modules)
rownames(B) <- NULL
B$Modules <- paste("RNA", sub("ME", "", B$Modules), sep="")
B$Tissue <- paste(gg)
B$ModulesTissue <- paste(gg, B$Modules, sep="_")
#Select only Enrichment that are correlated (0.3) to the abipotics and physiology and exclude module 0
InterestingComparison <- ModsOfInterst_list[[grep(paste(gg, "RNA", sep="_"), names(ModsOfInterst_list))]]
InterestingComparison <- lapply(InterestingComparison, rownames_to_column)
InterestingComparison <- do.call(rbind, lapply(names(InterestingComparison), function(name) {
data.frame(name, InterestingComparison[[name]], stringsAsFactors = FALSE)}))
names(InterestingComparison) <- c("variable", "module", "correlation", "p_value")
InterestingComparison$module <- paste(sub("ME","RNA", InterestingComparison$module))
InterestingComparison <- InterestingComparison[InterestingComparison$module != "RNA0", ]
BB <- B
BB <- BB[BB$Modules %in% unique(InterestingComparison$module),]
b <- length(Slims_GO)
Slims_GO[[b+1]] <- BB
names(Slims_GO)[[b+1]] <- gg
}, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})}
Slim_GO <- do.call(rbind.data.frame, Slims_GO)
rownames(Slim_GO) <- c()
Order <- c("Gill", "Liver")
mg <- ggplot(Slim_GO, aes(x = Modules, y = Term, color=Percent, size = Percent)) + geom_point() + scale_size(range = c(3, 20))
#scale_color_manual(colours = c("blue", "red"))
#scale_colour_manual(values=c("blue", "red"))
mg <- mg + facet_grid(Term ~ factor(Tissue, levels=Order), scales = "free", space = "free") +
scale_color_gradientn(colours = c("blue", "pink", "red", "red"),
values = scales::rescale(x = c(0, 1, 5, 15), to = c(0,1), from = c(0, 15) )) +
guides(color = guide_legend(title = "-log2(p_adjust)"), size = guide_legend(title = "-log2(p_adjust)")) +
#theme_minimal() +
atheme +
theme(strip.text.y = element_text(angle = 0)) +
theme(#axis.text.x=element_blank(),
#axis.ticks.x=element_blank(),
axis.text.y=element_blank(),
axis.title.y.left = element_blank(),
axis.ticks.y =element_blank()
#axis.title.x = element_blank()
) +
theme(
panel.background = element_rect(fill='transparent'), #transparent panel bg
plot.background = element_rect(fill='transparent', color=NA), #transparent plot bg
#panel.grid.major = element_blank(), #remove major gridlines
#panel.grid.minor = element_blank(), #remove minor gridlines
legend.background = element_rect(fill='transparent'), #transparent legend bg
legend.box.background = element_rect(fill='transparent'), #transparent legend panel
panel.grid.major = element_line(colour = "grey40"),
panel.grid.minor = element_line(colour = "grey40")
) +
theme(axis.title.x.bottom = element_text(color="grey13"),
strip.text = element_text(color = "black", face= "bold"))
g <- ggplot_gtable(ggplot_build(mg))
stripr <- which(grepl('strip-t', g$layout$name))
fills <- alpha(c("salmon", "darkred"), 0.4)
k <- 1
for (x in stripr) {
j <- which(grepl('rect', g$grobs[[x]]$grobs[[1]]$childrenOrder))
g$grobs[[x]]$grobs[[1]]$children[[j]]$gp$fill <- fills[k]
k <- k+1}
prow <- cowplot::plot_grid(g, labels = c(""), ncol = 1)
AA<- print(cowplot::plot_grid(prow, ncol = 1, rel_heights = c(0.1, 0.99)))
ggsave(AA, filename = paste(paste(Species, Year, Season, Analysis, "Slim_Tissues_GO_Pathway", sep="_"), ".png", sep=""), path = pathPlots , device='png', dpi=300, width = 12,height = 9)
AA
#-
https://www.biostars.org/p/294436/ https://www.biostars.org/p/419867/
https://support.bioconductor.org/p/95965/
in WGCNA hub genes are selected either by calculating the intramodular connectivity from the adjacency (as opposed to TOM), or by ranking genes using their correlation with the module eigengene (kME). Either one should produce results similar to but likely somewhat different from ranking of connectivity calculated from TOM, possibly on a subset of genes.
st_Gill_RNA <- 6
st_Liver_RNA <- 3
st_Gill_SSU <- 6
WGCNA_list <- readRDS(file = paste0(file.path(path_Output_WGCNA, paste(paste(Species, Year, Season, Analysis, Date, sep="_"), ".rds", sep=""))))
degrees_list <-readRDS(file = paste0(file.path(path_Output_WGCNA, paste(paste(Species, Year, Season, Analysis, "degrees_list", Date, sep="_"), ".rds", sep=""))))
#TPM Data
tpms <- list("tpmGill "= tpm_RNA_Gill, "tpmLiver" = tpm_RNA_Liver)
Hub_list <- list()
TPM_value <- 1
for (WGCNAset in names(WGCNA_list)[grepl("RNA|SSU", names(WGCNA_list))]) {
tryCatch({
require(tidyverse); require(WGCNA)
print(WGCNAset)
Hub_list_length <- length(Hub_list)
Tissue <- sub(paste0(".*", "(Gill|Liver)", ".*"), "\\1", WGCNAset)
Type <- sub(paste0(".*", "(RNA|SSU)", ".*"), "\\1", WGCNAset)
omics_data <- as.data.frame(WGCNA_list[[WGCNAset]][grepl("omics_data", names(WGCNA_list[[WGCNAset]]))][[1]])
MEs <- as.data.frame(WGCNA_list[[WGCNAset]][grepl("MEs", names(WGCNA_list[[WGCNAset]]))][[1]])
SAM <- as.data.frame(WGCNA_list[[WGCNAset]][grepl("datTraits", names(WGCNA_list[[WGCNAset]]))][[1]])
TPM <- tpms[[names(tpms)[grepl(Tissue, names(tpms))]]]
TPM <- TPM[names(TPM) %in% rownames(SAM)]
if (Type != "SSU") {
ANNO <- WGCNA_list[[WGCNAset]][grepl("GeneAnno", names(WGCNA_list[[WGCNAset]]))][[1]]
}
if (Tissue == "Liver" && Type == "RNA") {st <- st_Liver_RNA
} else if (Tissue == "Gill" && Type == "RNA") {st <- st_Gill_RNA
} else if (Tissue == "Gill" && Type == "SSU") {st <- st_Gill_SSU
} else {st <- NA }
print(st)
moduleColors <- WGCNA_list[[WGCNAset]][grepl("moduleColors", names(WGCNA_list[[WGCNAset]]))][[1]]
moduleLabels <- WGCNA_list[[WGCNAset]][grepl("moduleLabels", names(WGCNA_list[[WGCNAset]]))][[1]]
module_size<- as.data.frame(table(moduleLabels))
colnames(module_size) <- c("Module", "Size")
module_size %>% dplyr::mutate(Module_color = labels2colors(as.numeric(as.character(Module)))) -> module_size
degrees <- degrees_list[[paste("degrees", Tissue, Type, sep="_")]]
#################
#GetHubsFunction#
#################
gethubs<- function(omics_data, colorh, omitColors = "grey", power = st, type = "signed") {
isIndex = FALSE
modules = names(table(colorh))
if (!(is.na(omitColors)[1]))
modules = modules[!is.element(modules, omitColors)]
if (is.null(colnames(omics_data))) {
colnames(omics_data) = 1:dim(omics_data)[2]
isIndex = TRUE}
hubs = rep(NA, length(modules))
names(hubs) = modules
for (m in modules) {
adj = adjacency(omics_data[, colorh == m], power = power,
type = type)
hubs[m] = list(as.data.frame(rowSums(adj)))}
return(hubs)}
##################################################
#Get Hubs and join in Module and Gene information#
##################################################
hubs <- gethubs(omics_data, moduleColors, omitColors = 0)
hubs <- lapply(hubs, function(x) rownames_to_column(x, var = "geneid"))
hubs <- lapply(hubs, setNames, c("Geneid", "adjSums"))
hubs <- lapply(hubs, function(df) {df[order(df$adjSums, decreasing=TRUE),]})
for (ModuleColor in names(hubs)) {
hubs[[ModuleColor]]<- dplyr::left_join(hubs[[ModuleColor]] %>%
mutate(Module_color = ModuleColor), module_size |> mutate(MODULE = paste("ME", Module, sep="")))}
for (ModuleColor in names(hubs)) {
if (Type != "SSU") {
hubs[[ModuleColor]] <- dplyr::left_join(hubs[[ModuleColor]],
ANNO[[paste("GeneAnno_", unique(hubs[[ModuleColor]]$MODULE), sep="")]])
}
if (Type == "SSU") {hubs[[ModuleColor]]<- hubs[[ModuleColor]]}
}
hubs_expr <- list()
for (ModuleColor in names(hubs)) {
hub_expr_length <- length(hubs_expr)
hubs_expr[[hub_expr_length+1]] <- dplyr::left_join(hubs[[ModuleColor]],
as.data.frame(t(omics_data[names(omics_data) %in%
hubs[[ModuleColor]]$Geneid])) %>% mutate(Geneid = rownames(.)))
names(hubs_expr)[[hub_expr_length+1]] <- unique(hubs[[ModuleColor]]$MODULE)
}
##########################
#Filter HubsGenes for TPM#
##########################
if (Type != "SSU") {
#print("tpm")
#print(dim(TPM))
rownames(TPM) <-gsub("[[:punct:]]+", ".", rownames(TPM)) #replace special characters in gene names by dot
keep <- rowSums(TPM > TPM_value) >= 3
TPMFilter<- TPM[keep,]
print(dim(TPMFilter))
for (MODULE in names(hubs_expr)) {
#print(paste(MODULE, paste("Genes before TPM", TPM_value, "> 3 Filter"), dim(hubs_expr[[MODULE]])[1]))
hubs_expr[[MODULE]] <- hubs_expr[[MODULE]][hubs_expr[[MODULE]]$Geneid %in% rownames(TPMFilter),]
#print(paste(MODULE, paste("Genes after TPM", TPM_value, "> 3 Filter"), dim(hubs_expr[[MODULE]])[1]))
}
}
if (Type == "SSU") { hubs_expr[[MODULE]] <- hubs_expr[[MODULE]]
}
Hub_list[[Hub_list_length+1]] <- hubs_expr
names(Hub_list)[[Hub_list_length+1]] <- paste("hubs_expr", Tissue, Type, sep="_")
}, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
}
## [1] "SSU_Gill_WGCNA"
## [1] 6
## [1] "RNA_Gill_WGCNA"
## [1] 6
## [1] 20551 21
## [1] "RNA_Liver_WGCNA"
## [1] 3
## [1] 15590 22
###########
#Save Data#
###########
saveRDS(Hub_list, file = paste0(file.path(path_Output_WGCNA, paste(paste(Species, Year, Season, Analysis, "Hub_list", Date, sep="_"), ".rds", sep=""))))
Hub_list <-readRDS(file = paste0(file.path(path_Output_WGCNA, paste(paste(Species, Year, Season, Analysis, "Hub_list", Date, sep="_"), ".rds", sep=""))))
################
#Visualize HUBS#
################
ModuleSelection <- c(1, 5, 6, 37)
ModuleSelection <- paste(Type, ModuleSelection, sep="")
hubs_decreasing <- lapply(Hub_list$hubs_expr_Gill_RNA, function(df) {df[order(df$adjSums, decreasing=TRUE),]})
hubs_decreasing_head <- lapply(hubs_decreasing , function(x) head(x, 20))
hubs_decreasing_head$ME1[c("GeneSymbolHS", "Geneid", "adjSums")]
## GeneSymbolHS Geneid adjSums
## 1 TMEM204 tmem204 1273.984
## 2 EHD2 ehd2b 1263.441
## 3 PTK2 ptk2aa 1247.722
## 4 MPDZ LOC116039889 1238.925
## 5 OLFML2A olfml2a 1221.201
## 6 MPRIP LOC116063952 1212.292
## 7 PTPRD ptprdb 1211.198
## 8 CPN1 cpn1 1206.305
## 9 <NA> si.dkey.12l12.1 1202.459
## 10 PLA2R1 pla2r1 1200.137
## 11 NGFR ngfrb 1198.737
## 12 SHROOM4 shroom4 1192.123
## 13 FBN1 fbn1 1186.944
## 14 LOC118493121 LOC118493121 1186.422
## 15 LAMA4 lama4 1185.479
## 16 MMP14 mmp14a 1183.329
## 17 RASIP1 rasip1 1178.368
## 18 KERA LOC116066877 1177.769
## 19 SSPN sspn 1176.748
## 20 ANPEP LOC116057257 1176.711
hubs_decreasing_head$ME5$GeneSymbolHS
## [1] "EIF4G1" "ACBD3" "ABCF2" "CLINT1" "TMED2" "OXSR1" "GSPT1"
## [8] "CLPTM1" "TIMM23" NA "MMGT1" "GSK3B" "ATP13A1" "TXNRD3"
## [15] "ERGIC1" "SLC31A1" "SEC24A" "HNRNPL" "DNAJA2" "SEC61A1"
hubs_decreasing_head <- plyr::ldply (hubs_decreasing_head, data.frame)
hubs_decreasing_head$Module <- paste(Type, hubs_decreasing_head$Module, sep="")
hubs_decreasing_head_df<- hubs_decreasing_head %>% filter(.$Module %in% ModuleSelection)
hubs_decreasing_head_df <- data.frame(hubs_decreasing_head_df %>% pivot_longer(-c(".id","Geneid","adjSums","Module_color","Module", "Size","MODULE","GeneSymbolHS", "ENTREZID")))
hubs_decreasing_head_df$Sample_Order <- factor(hubs_decreasing_head_df$name, level=rownames(SAM))
hubs_decreasing_head_df <- mutate(hubs_decreasing_head_df, Label = ifelse(name == "SLSU21MLEB6", GeneSymbolHS, NA))
hubs_decreasing_head_df$ModuleSelection <- factor(hubs_decreasing_head_df$Module, level=ModuleSelection)
hubs_decreasing_head_df %>%
ggplot(., aes(x=Sample_Order, y=value, group=GeneSymbolHS)) +
geom_line(aes(color = ModuleSelection),alpha = 0.5) +
#ggrepel::geom_text_repel(aes(label = Label),segment.color = 'transparent',
# direction = "y", size = 3,fontface = 'bold', box.padding = 2,force = .8) +
ggrepel::geom_text_repel(aes(label = Label), #segment.color = 'transparent',
nudge_x = .15,
box.padding = 0.5,
#size = 3, fontface = 'bold',
nudge_y = 1,
segment.curvature = -0.1,
segment.ncp = 3,
segment.angle = 20) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90))+
facet_grid (rows = vars(ModuleSelection)) +
labs(x = "Sample",y = "normalized expression") +
scale_colour_manual(values = levels(factor(hubs_decreasing_head_df$Module_color))) +
theme(
axis.title.x.bottom = element_text(size=10,face = "bold"),
axis.title.y.left = element_text(size=10,face = "bold"),
strip.text.x = element_text(face = "bold"),
strip.text.x.bottom = element_text(size = 10,face = "bold"),
#strip.text.y.right = element_text(angle = 0, size=rel(0.8)),
strip.text.y.left = element_text(size = 10,face = "bold"),
axis.text.x.bottom = element_text(face = "bold", angle = 45, hjust = 1),
axis.text.y.left = element_text(face = "bold"),
#axis.text.y.left = element_text(size=rel(1)),
legend.title = element_text( size = 8),
legend.text = element_text(size = 8),
#legend.key.size = unit(0.4, 'cm'),
plot.title = element_text(size = 10, face = "bold"),
plot.subtitle = element_text(size = 9),
plot.caption = element_text(size = 9), panel.background = element_blank())
SLUCGeneManual <- read.csv2(file.path(path_Input_RNA, "SLUCGene-Manual-17.06.2023.csv"))[-1]
for (WGCNAset in names(ORA_list)) {
Tissue <- sub(paste0(".*", "(Gill|Liver)", ".*"), "\\1", WGCNAset)
Type <- sub(paste0(".*", "(RNA|SSU)", ".*"), "\\1", WGCNAset)
Analysis <- sub(paste0(".*", "(WGCNA)", ".*"), "\\1", WGCNAset)
Corr_Filter <- sub(paste0(".*", "(Corr_Filter....)", ".*"), "\\1", WGCNAset)
Corr_Filter <- sub("Corr_Filter_", "", Corr_Filter)
GeneAnno <- WGCNA_list[[paste(Type, Tissue, Analysis, sep="_")]][grepl("GeneAnno",
names(WGCNA_list[[paste(Type, Tissue, Analysis, sep="_")]]))][[1]]
HUBS <- Hub_list[[paste("hubs_expr", Tissue, Type, sep="_")]]
WGCNAset_list <- ORA_list[[WGCNAset]]
for (i in names(WGCNAset_list)[grepl("SLIMKEGG", names(WGCNAset_list))]) {
require(org.Hs.eg.db)
SLIMKEGG <- WGCNAset_list[[i]]
#Just to add in readbale geneSymbols for the EntrezID
for (ii in names(SLIMKEGG)) {
MODULE <- sub("slim_kegg_", "", ii)
ANNO <- GeneAnno[[which(sub("GeneAnno_", "", names(GeneAnno)) %in% MODULE)]]
HUB <- HUBS[[which(names(HUBS) %in% MODULE)]]
df_Keggs <- as.data.frame(SLIMKEGG[[ii]])
geneID_split <- str_split(df_Keggs$geneID, "/")
GeneSymbols <- character(length(geneID_split))
all_genes <- unlist(geneID_split)
gene_counts <- table(all_genes)
gene_counts_df <- as.data.frame(gene_counts)
colnames(gene_counts_df) <- c("ENTREZID", "CountinPathways")
gene_counts_df <- gene_counts_df[order(-gene_counts_df$Count), ]
gene_counts_df <- left_join(gene_counts_df, bind_rows(GeneAnno))
gene_counts_df <- gene_counts_df[!duplicated(gene_counts_df$ENTREZID),]
gene_counts_df <- left_join(gene_counts_df, HUB)
head(df_Keggs[, -which(colnames(df_Keggs) %in% c("GeneID", "Symbol"))])
A <- gene_counts_df[c("ENTREZID","Geneid", "GeneSymbolHS", "adjSums")]
B <- df_Keggs[c("Description", "geneID", "category", "subcategory")]
colnames(B) <- c("Description", "ENTREZID", "category", "subcategory")
B_long <- B %>%
separate_rows(ENTREZID, sep = "/")
C <- merge(A, B_long)
C <- C[C$category != "Human Diseases",]
CC <- C %>%
dplyr::arrange(desc(adjSums)) %>% # Order by adjSums in descending order
dplyr::distinct(GeneSymbolHS, .keep_all = TRUE) %>% # Keep distinct GeneID values
dplyr::slice(1:50)
CCC <- C[C$GeneSymbolHS %in% CC$GeneSymbolHS,]
CCC$GeneCombo <- paste(CCC$GeneSymbolHS, CCC$Geneid, sep="-")
CCC <- CCC %>% arrange(desc(adjSums))
# write.csv2(CCC, file = paste0(file.path(path_Output_WGCNA,
# paste(paste("Hubs", Analysis, Tissue, Type, i, "Corr_filter", Corr_Filter, ii, sep="_"),
# "csv", sep="."))))
HUB_C <- merge(HUB, C, all=T)
HUB_CC <- HUB_C %>%
dplyr::arrange(desc(adjSums)) %>% # Order by adjSums in descending order
dplyr::distinct(GeneSymbolHS, .keep_all = TRUE) %>% # Keep distinct GeneID values
dplyr::slice(1:100)
HUB_CCC <- HUB_C[HUB_C$GeneSymbolHS %in% HUB_CC$GeneSymbolHS,]
HUB_CCC$GeneCombo <- paste(HUB_CCC$GeneSymbolHS, HUB_CCC$Geneid, sep="-")
HUB_CCC_C <- HUB_CCC %>% relocate(GeneCombo, Description)
HUB_CCC_C <- merge(HUB_CCC_C, SLUCGeneManual[c("rowname", "GENENAME")] %>% mutate(Geneid = rowname))
HUB_CCC_C <- HUB_CCC_C %>%
dplyr::arrange(desc(adjSums))
# write.csv2(HUB_CCC_C, file = paste0(file.path(path_Output_WGCNA,
# paste(paste(Species, Year, Season, Tissue, Type, Analysis, "Corr_filter", Corr_Filter, "Hubgene_Order", sub("ME", "RNA", ii), sep="_"), ".csv", sep=""))))
HubHeatPlot <- CCC %>% ggplot(., aes(x=GeneCombo, y=Description, fill=adjSums)) +
geom_tile() + scale_fill_gradient2(name = "Connectivity", low = "#327eba",
mid = "white", high = "#e06663") +
theme(axis.text.x=element_text(angle = 60, hjust = 1)) +
atheme +
theme(
panel.background = element_rect(fill='transparent'), #transparent panel bg
plot.background = element_rect(fill='transparent', color=NA), #transparent plot bg
panel.grid.major = element_line(colour = "grey40"),
panel.grid.minor = element_line(colour = "grey40")
) +
theme(axis.title.x.bottom = element_text(color="grey13"),
strip.text = element_text(color = "black", face= "bold")) +
ggtitle(paste(Tissue, sub("ME", "RNA", MODULE)))
##################
#Plot a selection#
##################
if (Type == "RNA" & Tissue == "Gill" & MODULE %in% c("ME2", "ME7", "ME11")) {
plot(HubHeatPlot)
} else if (Tissue == "Liver" & MODULE %in% c("ME1", "ME3")) {
plot(HubHeatPlot)
} else if (Type == "SSU" & MODULE %in% c("ME1", "ME2", "ME3")) {
plot(HubHeatPlot)
} else {
# print("Plots are saved to the pathPlots")
}
ggsave(HubHeatPlot, filename = paste(paste(Species, Year, Season, Tissue, Type, Analysis, "HubHeatPlot", ii, sep="_"), ".png", sep=""), path = pathPlots ,
device='png', dpi=300, width = 12,height = 8)
}
}
}
https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-9-559
Category 3: Functions for module and gene selection Finding biologically or clinically significant modules and genes is a major goal of many co-expression analyses. The definition of biological or clinical significance depends on the research question under consideration. Abstractly speaking, we define a gene significance measure as a function GS that assigns a non-negative number to each gene; the higher GS i the more biologically significant is gene i. In functional enrichment analysis, a gene significance measure could indicate pathway membership. In gene knock-out experiments, gene significance could indicate knock-out essentiality. A microarray sample trait T can be used to define a trait-based gene significance measure as the absolute correlation between the trait and the expression profiles, Equation 2. A measure of module significance can be defined as average gene significance across the module genes (Figure 3A). When dealing with a sample trait T, a measure of statistical significance between the module eigengene E and the trait T can be defined, for example, using correlation (Equation 2) or a p-value (Equation 3) obtained from a univariate regression model between E and T. Modules with high trait significance may represent pathways associated with the sample trait. Genes with high module membership in modules related to traits (Figure 3B) are natural candidates for further validation [10, 14, 15, 18].
Corr_Filter <- 0.8
ORA_list <- list()
ORA_list$Gill_RNA_WGCNA_MODULES_Corr_Filter_0.8 <- readRDS(file = paste0(file.path(path_Output_WGCNA, paste(paste(Species, Year, Season, Analysis, "ORA_list_Gill", Corr_Filter, Date, sep="_"), ".rds", sep=""))))
ORA_list$Liver_RNA_WGCNA_MODULES_Corr_Filter_0.8 <- readRDS(file = paste0(file.path(path_Output_WGCNA, paste(paste(Species, Year, Season, Analysis, "ORA_list_Liver", Corr_Filter, Date, sep="_"), ".rds", sep=""))))
WGCNA_list <- readRDS(file = paste0(file.path(path_Output_WGCNA, paste(paste(Species, Year, Season, Analysis, Date, sep="_"), ".rds", sep=""))))
SLUCGeneManual <- read.csv2(file.path(path_Input_RNA, "SLUCGene-Manual-17.06.2023.csv"))[-1]
scatter_list <-readRDS(file = paste0(file.path(path_Output_WGCNA, paste(paste(Species, Year, Season, Analysis, "scatter_list", Date, sep="_"), ".rds", sep=""))))
#TPM Data
tpms <- list("tpmGill "= tpm_RNA_Gill, "tpmLiver" = tpm_RNA_Liver)
TPM_value <- 1
Hub_list <-readRDS(file = paste0(file.path(path_Output_WGCNA, paste(paste(Species, Year, Season, Analysis, "Hub_list", Date, sep="_"), ".rds", sep=""))))
ModsOfInterst_list <- readRDS(file = paste0(file.path(path_Output_WGCNA, paste(paste(Species, Year, Season, Analysis, "ModsOfInterst_list", Date, sep="_"), ".rds", sep=""))))
HUB_GS_List <- list()
for (WGCNAset in names(ORA_list)) {
HUB_GS_List_length <- length(HUB_GS_List)
Tissue <- sub(paste0(".*", "(Gill|Liver)", ".*"), "\\1", WGCNAset)
Type <- sub(paste0(".*", "(RNA|SSU)", ".*"), "\\1", WGCNAset)
Analysis <- sub(paste0(".*", "(WGCNA)", ".*"), "\\1", WGCNAset)
Corr_Filter <- sub(paste0(".*", "(Corr_Filter....)", ".*"), "\\1", WGCNAset)
Corr_Filter <- sub("Corr_Filter_", "", Corr_Filter)
GeneAnno <- WGCNA_list[[paste(Type, Tissue, Analysis, sep="_")]][grepl("GeneAnno",
names(WGCNA_list[[paste(Type, Tissue, Analysis, sep="_")]]))][[1]]
Scatter <- scatter_list[[paste("Scatter", Type, Tissue, sep="_")]]
HUBS <- Hub_list[[paste("hubs_expr", Tissue, Type, sep="_")]]
WGCNA_Ora_list <- ORA_list[[WGCNAset]]
WGCNAset_list <- WGCNA_list[[paste(Type, Tissue, Analysis, sep="_")]]
ModsOfInterst <- ModsOfInterst_list[[paste("ModsOfInterest", Tissue, Type, sep="_")]]
omics_data <- as.data.frame(WGCNA_list[[paste(Type, Tissue, Analysis, sep="_")]][grepl("omics_data",names(WGCNA_list[[paste(Type, Tissue, Analysis, sep="_")]]))][[1]])
MEs <- as.data.frame(WGCNA_list[[paste(Type, Tissue, Analysis, sep="_")]][grepl("MEs", names(WGCNA_list[[paste(Type, Tissue, Analysis, sep="_")]]))][[1]])
SAM <- as.data.frame(WGCNA_list[[paste(Type, Tissue, Analysis, sep="_")]][grepl("datTraits", names(WGCNA_list[[paste(Type, Tissue, Analysis, sep="_")]]))][[1]])
Samples <- rownames(SAM)
TPM <- tpms[[names(tpms)[grepl(Tissue, names(tpms))]]]
TPM <- TPM[names(TPM) %in% rownames(SAM)]
WGCNAset_list <- WGCNA_list[[paste(Type, Tissue, Analysis, sep="_")]]
WGCNA_Ora_list <- ORA_list[[WGCNAset]]
SLIMKEGG <- WGCNA_Ora_list$SLIMKEGG
#There is the Pathway "Efferocytosis" enriched in Gill-RNA33 but the pathway lacks propper
#description in the Slims, so we delete it
filter_rows <- function(df) {
df[!df$Description %in% "Efferocytosis", ]}
SLIMKEGG <- lapply(SLIMKEGG, filter_rows)
not_empty <- function(df) {
nrow(df) > 0 && ncol(df) > 0}
SLIMKEGG <- Filter(not_empty, SLIMKEGG)
not_in <- function(x, y) { return(!(x %in% y))}
HUB_GS_MEs_List <- list()
for (MODULE in names(HUBS)) {
print(MODULE)
if (MODULE %in% names(HUBS)[names(HUBS) %in% sub("slim_kegg_", "", names(SLIMKEGG))]) {
require(org.Hs.eg.db)
HUB_GS_MEs_List_length <- length( HUB_GS_MEs_List)
ANNO <- GeneAnno[[which(sub("GeneAnno_", "", names(GeneAnno)) %in% MODULE)]]
HUB <- HUBS[[which(names(HUBS) %in% MODULE)]][1:8]
df_Keggs <- as.data.frame(SLIMKEGG[[paste("slim_kegg_", MODULE, sep="")]])
df_Keggs <- df_Keggs[df_Keggs$category != "Human Diseases", ]
geneID_split <- str_split(df_Keggs$geneID, "/")
GeneSymbols <- character(length(geneID_split))
all_genes <- unlist(geneID_split)
gene_counts <- table(all_genes)
gene_counts_df <- as.data.frame(gene_counts)
colnames(gene_counts_df) <- c("ENTREZID", "CountinPathways")
gene_counts_df <- gene_counts_df[order(-gene_counts_df$Count), ]
gene_counts_df <- left_join(gene_counts_df, bind_rows(GeneAnno))
gene_counts_df <- gene_counts_df[!duplicated(gene_counts_df$ENTREZID),]
gene_counts_df <- left_join(gene_counts_df, HUB)
head(df_Keggs[, -which(colnames(df_Keggs) %in% c("GeneID", "Symbol"))])
A <- gene_counts_df[c("ENTREZID","Geneid", "GeneSymbolHS", "adjSums")]
B <- df_Keggs[c("Description", "p.adjust", "geneID", "category", "subcategory")]
colnames(B) <- c("Description", "p.adjust", "ENTREZID", "category", "subcategory")
B_long <- B %>%
separate_rows(ENTREZID, sep = "/")
C <- merge(A, B_long)
C <- C[C$category != "Human Diseases",]
C_combined <- C %>%
dplyr::group_by(Geneid) %>%
dplyr::summarize(Description = paste(Description, collapse = "/"),
p.adjust = paste(p.adjust, collapse = "/"),
category = paste(category, collapse = "/"),
subcategory = paste(subcategory, collapse = "/"))
C_combined <- left_join(A, C_combined)
HUB_C <- merge(HUB, C_combined, all=T)
C_combined[!C_combined$Geneid %in% HUB$Geneid,]
#These are genes where no adjSums was calcualted if not present in all Samples
HUB_C$GeneCombo <- paste(HUB_C$GeneSymbolHS, HUB_C$Geneid, sep="-")
HUB_C <- HUB_C %>% relocate(GeneCombo, Description)
HUB_C_ANNO <- left_join(HUB_C, SLUCGeneManual[c("rowname", "GENENAME")] %>% mutate(Geneid = rowname))
Scatter_AllTraits <- do.call(rbind.data.frame, Scatter)
rownames(Scatter_AllTraits) <- NULL
Scatter_AllTraits_ME <- Scatter_AllTraits[Scatter_AllTraits$Modules %in%
paste("RNA", sub("ME", "", MODULE), sep=""),]
Scatter_AllTraits_ME <- Scatter_AllTraits_ME[-which( grepl("MMME", names(Scatter_AllTraits_ME)))]
Scatter_AllTraits_ME_split <- split(Scatter_AllTraits_ME, Scatter_AllTraits_ME$TraitOfInterest)
Scatter_AllTraits_ME2 <- Scatter_AllTraits_ME[c("Geneid", "TraitOfInterest", "GS.", "p.GS.")]
Scatter_AllTraits_ME2<-Scatter_AllTraits_ME2 %>%
group_by(TraitOfInterest) %>%
filter(!duplicated(Geneid))
Scatter_AllTraits_ME_wide <- Scatter_AllTraits_ME2 %>%
pivot_wider(
id_cols = Geneid,
names_from = TraitOfInterest,
values_from = c(GS., p.GS.)) %>% as.data.frame()
Scatter_AllTraits_ME_wide<- left_join(Scatter_AllTraits_ME_wide, Scatter_AllTraits_ME[
c("Geneid", "ModuleMembership", "Module_color", "Modules")] %>%
filter(!duplicated(Geneid)))
HUB_C_ANNO_Scatter <- merge(HUB_C_ANNO, Scatter_AllTraits_ME_wide, all=T)
##########################
#Filter HubsGenes for TPM#
##########################
if (Type != "SSU") {
#print("tpm")
#print(dim(TPM))
rownames(TPM) <-gsub("[[:punct:]]+", ".", rownames(TPM))
#replace special characters in gene names by dot
keep <- rowSums(TPM > TPM_value) >= 3
TPMFilter<- TPM[keep,]
#print(dim(TPMFilter))
#print(paste(MODULE, paste("Genes before TPM", TPM_value, "> 3 Filter"), dim(HUB_C_ANNO_Scatter)[1]))
HUB_C_ANNO_Scatter <- HUB_C_ANNO_Scatter[HUB_C_ANNO_Scatter$Geneid %in% rownames(TPMFilter),]
#print(paste(MODULE, paste("Genes after TPM", TPM_value, "> 3 Filter"), dim(HUB_C_ANNO_Scatter)[1]))
}
if (Type == "SSU") { HUB_C_ANNO_Scatter <- HUB_C_ANNO_Scatter
}
HUB_C_ANNO_Scatter_omics <-
left_join(HUB_C_ANNO_Scatter,
as.data.frame(t(omics_data[names(omics_data) %in% HUB_C_ANNO_Scatter$Geneid])) %>%
mutate(Geneid = rownames(.)))
#########################
#Add in Ordered TPM data#
#names(TPMFilter)
TPMFilter <- TPMFilter %>%
dplyr::select(all_of(Samples))
TPMFilter2 <- TPMFilter
names(TPMFilter2) <- paste(names(TPMFilter2), "tpm", sep="")
HUB_C_ANNO_Scatter_omics <-
dplyr::left_join(HUB_C_ANNO_Scatter,
TPMFilter2[rownames(TPMFilter2) %in% HUB_C_ANNO_Scatter$Geneid,] %>% mutate(Geneid = rownames(.)))
HUB_C_ANNO_Scatter_omics <- HUB_C_ANNO_Scatter_omics %>%
dplyr::arrange(desc(adjSums))
#################################
#Filter for ModuleMembership 0.8#
HUB_C_ANNO_Scatter_omics_Filtered <- HUB_C_ANNO_Scatter_omics %>%
dplyr::filter(ModuleMembership > 0.8)
HUB_C_ANNO_Scatter_omics_Filtered <- HUB_C_ANNO_Scatter_omics_Filtered %>%
arrange(desc(adjSums))
write.csv2(HUB_C_ANNO_Scatter_omics_Filtered, file = paste0(file.path(path_Output_WGCNA,
paste(paste(Species, Year, Season, Tissue, Type, Analysis, "Hubs_GS_TPMfilter", "slimKeggs", "Corr_filter", Corr_Filter, sub("ME", "RNA", MODULE), sep="_"), "csv", sep="."))))
#print(paste("Wrote csv for", Type, Tissue, MODULE ))
HUB_GS_MEs_List[[HUB_GS_MEs_List_length+1]] <- HUB_C_ANNO_Scatter_omics
names(HUB_GS_MEs_List)[[HUB_GS_MEs_List_length+1]] <- MODULE
} else if (MODULE %in% names(HUBS)[not_in(names(HUBS), sub("slim_kegg_", "", names(SLIMKEGG)))]) {
###################################################################
#For Modules where ORA on KEGG did not lead to a pathway detectiom#
###################################################################
#print(paste(Tissue, Type, MODULE, "Did not have Pathway results"))
HUB_GS_MEs_List_length <- length(HUB_GS_MEs_List)
ANNO <- GeneAnno[[which(sub("GeneAnno_", "", names(GeneAnno)) %in% MODULE)]]
HUB <- HUBS[[which(names(HUBS) %in% MODULE)]][1:8]
HUB$GeneCombo <- paste(HUB$GeneSymbolHS, HUB$Geneid, sep="-")
HUB_ANNO <- left_join(HUB, SLUCGeneManual[c("rowname", "GENENAME")] %>% mutate(Geneid = rowname))
Scatter_AllTraits <- do.call(rbind.data.frame, Scatter)
rownames(Scatter_AllTraits) <- NULL
Scatter_AllTraits_ME <- Scatter_AllTraits[Scatter_AllTraits$Modules %in%
paste("RNA", sub("ME", "", MODULE), sep=""),]
Scatter_AllTraits_ME <- Scatter_AllTraits_ME[-which( grepl("MMME", names(Scatter_AllTraits_ME)))]
Scatter_AllTraits_ME_split <- split(Scatter_AllTraits_ME, Scatter_AllTraits_ME$TraitOfInterest)
Scatter_AllTraits_ME2 <- Scatter_AllTraits_ME[c("Geneid", "TraitOfInterest", "GS.", "p.GS.")]
Scatter_AllTraits_ME2<-Scatter_AllTraits_ME2 %>%
group_by(TraitOfInterest) %>%
filter(!duplicated(Geneid))
Scatter_AllTraits_ME_wide <- Scatter_AllTraits_ME2 %>%
pivot_wider(
id_cols = Geneid,
names_from = TraitOfInterest,
values_from = c(GS., p.GS.)) %>% as.data.frame()
Scatter_AllTraits_ME_wide<- left_join(Scatter_AllTraits_ME_wide, Scatter_AllTraits_ME[
c("Geneid", "ModuleMembership", "Module_color", "Modules")] %>%
filter(!duplicated(Geneid)))
HUB_ANNO_Scatter <- merge(HUB_ANNO, Scatter_AllTraits_ME_wide, all=T)
##########################
#Filter HubsGenes for TPM#
##########################
if (Type != "SSU") {
#print("tpm")
#print(dim(TPM))
rownames(TPM) <-gsub("[[:punct:]]+", ".", rownames(TPM)) #replace special characters in gene names by dot
keep <- rowSums(TPM > TPM_value) >= 3
TPMFilter<- TPM[keep,]
#print(dim(TPMFilter))
#print(paste(MODULE, paste("Genes before TPM", TPM_value, "> 3 Filter"), dim(HUB_ANNO_Scatter)[1]))
HUB_ANNO_Scatter <- HUB_ANNO_Scatter[HUB_ANNO_Scatter$Geneid %in% rownames(TPMFilter),]
#print(paste(MODULE, paste("Genes after TPM", TPM_value, "> 3 Filter"), dim(HUB_ANNO_Scatter)[1]))
}
if (Type == "SSU") { HUB_ANNO_Scatter <- HUB_ANNO_Scatter
}
HUB_ANNO_Scatter_omics <-
dplyr::left_join(HUB_ANNO_Scatter,
as.data.frame(t(omics_data[names(omics_data) %in% HUB_ANNO_Scatter$Geneid])) %>%
dplyr::mutate(Geneid = rownames(.)))
#########################
#Add in Ordered TPM data#
names(TPMFilter)
TPMFilter <- TPMFilter %>%
dplyr::select(all_of(Samples))
TPMFilter2 <- TPMFilter
names(TPMFilter2) <- paste(names(TPMFilter2), "tpm", sep="")
HUB_ANNO_Scatter_omics <-
dplyr::left_join(HUB_ANNO_Scatter,
TPMFilter2[rownames(TPMFilter2) %in% HUB_ANNO_Scatter$Geneid,] %>% mutate(Geneid = rownames(.)))
HUB_ANNO_Scatter_omics <- HUB_ANNO_Scatter_omics %>%
dplyr::arrange(desc(adjSums))
#################################
#Filter for ModuleMembership 0.8#
HUB_ANNO_Scatter_omics_Filtered <- HUB_ANNO_Scatter_omics %>%
dplyr::filter(ModuleMembership > 0.8)
HUB_ANNO_Scatter_omics_Filtered <- HUB_ANNO_Scatter_omics_Filtered %>%
arrange(desc(adjSums))
###########
#Write CSV#
write.csv2(HUB_ANNO_Scatter_omics_Filtered , file = paste0(file.path(path_Output_WGCNA,
paste(paste(Species, Year, Season, Tissue, Type, Analysis, "Hubs_GS_TPMfilter", "NoEnrich","Corr_filter", Corr_Filter, sub("ME", "RNA", MODULE), sep="_"), "csv", sep="."))))
#print(paste("Wrote csv for", Type, Tissue, MODULE, "No Enrichment"))
HUB_GS_MEs_List[[HUB_GS_MEs_List_length+1]] <- HUB_ANNO_Scatter_omics
names(HUB_GS_MEs_List)[[HUB_GS_MEs_List_length+1]] <- MODULE
}
HUB_GS_List[[HUB_GS_List_length+1]] <- HUB_GS_MEs_List
names(HUB_GS_List)[[HUB_GS_List_length+1]] <- paste("HUB_GS", Tissue, Type, Analysis, sep="_")
}
}
## [1] "ME7"
## [1] "ME2"
## [1] "ME3"
## [1] "ME14"
## [1] "ME22"
## [1] "ME24"
## [1] "ME34"
## [1] "ME33"
## [1] "ME26"
## [1] "ME21"
## [1] "ME23"
## [1] "ME5"
## [1] "ME11"
## [1] "ME0"
## [1] "ME17"
## [1] "ME16"
## [1] "ME18"
## [1] "ME19"
## [1] "ME9"
## [1] "ME15"
## [1] "ME25"
## [1] "ME31"
## [1] "ME8"
## [1] "ME38"
## [1] "ME10"
## [1] "ME6"
## [1] "ME20"
## [1] "ME29"
## [1] "ME13"
## [1] "ME35"
## [1] "ME28"
## [1] "ME37"
## [1] "ME30"
## [1] "ME12"
## [1] "ME1"
## [1] "ME32"
## [1] "ME27"
## [1] "ME4"
## [1] "ME36"
## [1] "ME7"
## [1] "ME2"
## [1] "ME3"
## [1] "ME14"
## [1] "ME22"
## [1] "ME24"
## [1] "ME34"
## [1] "ME33"
## [1] "ME26"
## [1] "ME21"
## [1] "ME23"
## [1] "ME5"
## [1] "ME11"
## [1] "ME0"
## [1] "ME17"
## [1] "ME16"
## [1] "ME18"
## [1] "ME19"
## [1] "ME9"
## [1] "ME15"
## [1] "ME25"
## [1] "ME39"
## [1] "ME31"
## [1] "ME8"
## [1] "ME38"
## [1] "ME10"
## [1] "ME6"
## [1] "ME20"
## [1] "ME29"
## [1] "ME13"
## [1] "ME35"
## [1] "ME28"
## [1] "ME37"
## [1] "ME30"
## [1] "ME12"
## [1] "ME1"
## [1] "ME32"
## [1] "ME27"
## [1] "ME4"
## [1] "ME36"
###########
#Save Data#
###########
saveRDS(HUB_GS_List, file = paste0(file.path(path_Output_WGCNA, paste(paste(Species, Year, Season, Analysis, "HUB_GeneSig", Date, sep="_"), ".rds", sep=""))))
HUB_GS_List <- readRDS(file = paste0(file.path(path_Output_WGCNA, paste(paste(Species, Year, Season, Analysis, "HUB_GeneSig", Date, sep="_"), ".rds", sep=""))))
##############
#Module stats#
##############
# result_df<- data.frame(GenesPerModule = unlist(lapply(HUB_GS_List$HUB_GS_Gill_RNA_WGCNA, function(x) dim(x)[1])))
# result_df[order(result_df$GenesPerModule, decreasing = TRUE), , drop=FALSE]
#
# for (ME in names(HUB_GS_List$HUB_GS_Gill_RNA_WGCNA)) {
# MEset <- HUB_GS_List$HUB_GS_Gill_RNA_WGCNA[[ME]]
# MEset <- MEset %>%
# dplyr::filter(ModuleMembership > 0.8)
# print(paste(ME, dim(MEset)[1]))}
#
# ModsOfInterst_list$ModsOfInterest_Gill_RNA$O2
##########
#Filtered#
##########
#Filtered for significant correlation to one of the physiological/abiotic traits
HUB_GS_List_Filtered <- list()
for (WGCNAset in names(ORA_list)) {
HUB_GS_List_Filtered_length <- length(HUB_GS_List_Filtered)
Tissue <- sub(paste0(".*", "(Gill|Liver)", ".*"), "\\1", WGCNAset)
Type <- sub(paste0(".*", "(RNA|SSU)", ".*"), "\\1", WGCNAset)
Analysis <- sub(paste0(".*", "(WGCNA)", ".*"), "\\1", WGCNAset)
Corr_Filter <- sub(paste0(".*", "(Corr_Filter....)", ".*"), "\\1", WGCNAset)
Corr_Filter <- sub("Corr_Filter_", "", Corr_Filter)
GeneAnno <- WGCNA_list[[paste(Type, Tissue, Analysis, sep="_")]][grepl("GeneAnno",
names(WGCNA_list[[paste(Type, Tissue, Analysis, sep="_")]]))][[1]]
Scatter <- scatter_list[[paste("Scatter", Type, Tissue, sep="_")]]
HUBS <- Hub_list[[paste("hubs_expr", Tissue, Type, sep="_")]]
WGCNAset_list <- ORA_list[[WGCNAset]]
ModsOfInterst <- ModsOfInterst_list[[paste("ModsOfInterest", Tissue, Type, sep="_")]]
omics_data <- as.data.frame(WGCNA_list[[paste(Type, Tissue, Analysis, sep="_")]][grepl("omics_data", names(WGCNA_list[[paste(Type, Tissue, Analysis, sep="_")]]))][[1]])
MEs <- as.data.frame(WGCNA_list[[paste(Type, Tissue, Analysis, sep="_")]][grepl("MEs", names(WGCNA_list[[paste(Type, Tissue, Analysis, sep="_")]]))][[1]])
SAM <- as.data.frame(WGCNA_list[[paste(Type, Tissue, Analysis, sep="_")]][grepl("datTraits", names(WGCNA_list[[paste(Type, Tissue, Analysis, sep="_")]]))][[1]])
TPM <- tpms[[names(tpms)[grepl(Tissue, names(tpms))]]]
TPM <- TPM[names(TPM) %in% rownames(SAM)]
for (slimKeggs in names(WGCNAset_list)[grepl("SLIMKEGG", names(WGCNAset_list))]) {
require(org.Hs.eg.db)
SLIMKEGG <- WGCNAset_list[[slimKeggs]]
#Just to add in readbale geneSymbols for the EntrezID
HUB_GS_MEs_Filtered_List <- list()
for (slimKegg in names(SLIMKEGG)) {
HUB_GS_MEs_Filtered_List_length <- length(HUB_GS_MEs_Filtered_List)
MODULE <- sub("slim_kegg_", "", slimKegg)
ANNO <- GeneAnno[[which(sub("GeneAnno_", "", names(GeneAnno)) %in% MODULE)]]
HUB <- HUBS[[which(names(HUBS) %in% MODULE)]][1:8]
df_Keggs <- as.data.frame(SLIMKEGG[[slimKegg]])
geneID_split <- str_split(df_Keggs$geneID, "/")
GeneSymbols <- character(length(geneID_split))
all_genes <- unlist(geneID_split)
gene_counts <- table(all_genes)
gene_counts_df <- as.data.frame(gene_counts)
colnames(gene_counts_df) <- c("ENTREZID", "CountinPathways")
gene_counts_df <- gene_counts_df[order(-gene_counts_df$Count), ]
gene_counts_df <- left_join(gene_counts_df, bind_rows(GeneAnno))
gene_counts_df <- gene_counts_df[!duplicated(gene_counts_df$ENTREZID),]
gene_counts_df <- left_join(gene_counts_df, HUB)
head(df_Keggs[, -which(colnames(df_Keggs) %in% c("GeneID", "Symbol"))])
A <- gene_counts_df[c("ENTREZID","Geneid", "GeneSymbolHS", "adjSums")]
B <- df_Keggs[c("Description", "p.adjust", "geneID", "category", "subcategory")]
colnames(B) <- c("Description", "p.adjust", "ENTREZID", "category", "subcategory")
B_long <- B %>%
separate_rows(ENTREZID, sep = "/")
C <- merge(A, B_long)
C <- C[C$category != "Human Diseases",]
C_combined <- C %>%
dplyr::group_by(Geneid) %>%
dplyr::summarize(Description = paste(Description, collapse = "/"),
p.adjust = paste(p.adjust, collapse = "/"),
category = paste(category, collapse = "/"),
subcategory = paste(subcategory, collapse = "/")
)
C_combined <- left_join(A, C_combined)
HUB_C <- merge(HUB, C_combined, all=T)
C_combined[!C_combined$Geneid %in% HUB$Geneid,]
#These are genes where no adjSums was calcualted if not present in all Samples
HUB_C$GeneCombo <- paste(HUB_C$GeneSymbolHS, HUB_C$Geneid, sep="-")
HUB_C <- HUB_C %>% relocate(GeneCombo, Description)
HUB_C_ANNO <- left_join(HUB_C, SLUCGeneManual[c("rowname", "GENENAME")] %>% mutate(Geneid = rowname))
Scatter_AllTraits <- do.call(rbind.data.frame, Scatter)
rownames(Scatter_AllTraits) <- NULL
Scatter_AllTraits_ME <- Scatter_AllTraits[Scatter_AllTraits$Modules %in%
paste("RNA", sub("ME", "", MODULE), sep=""),]
Scatter_AllTraits_ME <- Scatter_AllTraits_ME[-which( grepl("MMME", names(Scatter_AllTraits_ME)))]
Scatter_AllTraits_ME_split <- split(Scatter_AllTraits_ME, Scatter_AllTraits_ME$TraitOfInterest)
Scatter_AllTraits_ME2 <- Scatter_AllTraits_ME[c("Geneid", "TraitOfInterest", "GS.", "p.GS.")]
Scatter_AllTraits_ME2<-Scatter_AllTraits_ME2 %>%
group_by(TraitOfInterest) %>%
filter(!duplicated(Geneid))
Scatter_AllTraits_ME_wide <- Scatter_AllTraits_ME2 %>%
pivot_wider(
id_cols = Geneid,
names_from = TraitOfInterest,
values_from = c(GS., p.GS.)) %>% as.data.frame()
Scatter_AllTraits_ME_wide<- left_join(Scatter_AllTraits_ME_wide, Scatter_AllTraits_ME[
c("Geneid", "ModuleMembership", "Module_color", "Modules")] %>%
filter(!duplicated(Geneid)))
HUB_C_ANNO_Scatter <- merge(HUB_C_ANNO, Scatter_AllTraits_ME_wide, all=T)
HUB_C_ANNO_Scatter <- HUB_C_ANNO_Scatter %>%
arrange(desc(adjSums))
##########################
#Filter HubsGenes for TPM#
##########################
if (Type != "SSU") {
#print("tpm")
#print(dim(TPM))
rownames(TPM) <-gsub("[[:punct:]]+", ".", rownames(TPM)) #replace special characters in gene names by dot
keep <- rowSums(TPM > TPM_value) >= 3
TPMFilter <- TPM[keep,]
print(dim(TPMFilter))
#print(paste(MODULE, paste("Genes before TPM", TPM_value, "> 3 Filter"), dim(HUB_C_ANNO_Scatter)[1]))
HUB_C_ANNO_Scatter <- HUB_C_ANNO_Scatter[HUB_C_ANNO_Scatter$Geneid %in% rownames(TPMFilter),]
#print(paste(MODULE, paste("Genes after TPM", TPM_value, "> 3 Filter"), dim(HUB_C_ANNO_Scatter)[1]))
}
if (Type == "SSU") { HUB_C_ANNO_Scatter <- HUB_C_ANNO_Scatter
}
HUB_C_ANNO_Scatter <- HUB_C_ANNO_Scatter %>%
arrange(desc(adjSums))
HUB_C_ANNO_Scatter_omics <-
left_join(HUB_C_ANNO_Scatter,
as.data.frame(t(omics_data[names(omics_data) %in% HUB_C_ANNO_Scatter$Geneid])) %>%
mutate(Geneid = rownames(.)))
names(TPMFilter)
TPMFilter2 <- TPMFilter
names(TPMFilter2) <- paste(names(TPMFilter2), "tpm", sep="")
HUB_C_ANNO_Scatter_omics <-
left_join(HUB_C_ANNO_Scatter,
TPMFilter2[rownames(TPMFilter2) %in% HUB_C_ANNO_Scatter$Geneid,] %>% mutate(Geneid = rownames(.)))
#########################################
#Filter for MM > 0.8 and TraitCorr > 0.3#
#########################################
GS_Geneid <- list()
for (GS in names(HUB_C_ANNO_Scatter_omics)[grepl("GS.", names(HUB_C_ANNO_Scatter_omics))][names(HUB_C_ANNO_Scatter_omics)[grepl("GS.", names(HUB_C_ANNO_Scatter_omics))] !=
names(HUB_C_ANNO_Scatter_omics)[grepl("p.GS.", names(HUB_C_ANNO_Scatter_omics))]]) {
GS_Geneid_length <- length(GS_Geneid)
GS_Geneid[[GS_Geneid_length+1]] <- na.omit(HUB_C_ANNO_Scatter_omics[abs(HUB_C_ANNO_Scatter_omics[[GS]]) > 0.3, ]$Geneid)
names(GS_Geneid)[[GS_Geneid_length+1]] <- GS
}
GS_Geneid_rbind<- unique(unlist(GS_Geneid))
HUB_C_ANNO_Scatter_omics_Filtered <- HUB_C_ANNO_Scatter_omics %>%
dplyr::filter(ModuleMembership > 0.8, Geneid %in% GS_Geneid_rbind)
HUB_C_ANNO_Scatter_omics_Filtered <- HUB_C_ANNO_Scatter_omics_Filtered %>%
arrange(desc(adjSums))
#write.csv2(HUB_C_ANNO_Scatter_omics_Filtered, file = paste0(file.path(path_Output_WGCNA,
# paste(paste("Hubs_GS_Filtered", Analysis, Tissue, Type, slimKeggs, "Corr_filter", Corr_Filter,
# MODULE, sep="_"), "csv", sep="."))))
# HUB_C_ANNO_Scatter_omics_Filtered_long <- as.data.frame(HUB_C_ANNO_Scatter_omics_Filtered %>%
# separate_rows(Description, p.adjust, category, subcategory, sep = "/"))
#
# Description_10 <- as.data.frame(HUB_C_ANNO_Scatter_omics_Filtered_long %>%
# distinct(Description, .keep_all = TRUE) %>%
# arrange(as.numeric(p.adjust)) %>%
# slice(1:10))$Description
#
# HUB_C_ANNO_Scatter_omics_Filtered_long <-
# HUB_C_ANNO_Scatter_omics_Filtered_long[HUB_C_ANNO_Scatter_omics_Filtered_long$Description %in%
# c(Description_10, NA), ]
#
# HUB_C_ANNO_Scatter_omics_Filtered_long <-
# HUB_C_ANNO_Scatter_omics_Filtered_long[!is.na(HUB_C_ANNO_Scatter_omics_Filtered_long$Description),]
#
#
#
#
# HubHeatPlot <- HUB_C_ANNO_Scatter_omics_Filtered_long %>% ggplot(., aes(x=GeneCombo, y=Description,
# fill=GS._O2)) +
# geom_tile() + scale_fill_gradient2(name = "Connectivity", low = "#327eba",
# mid = "white", high = "#e06663") +
# theme(axis.text.x=element_text(angle = 60, hjust = 1)) +
#
# #guides(color = guide_legend(title = "-log2(p_adjust)"), size = guide_legend(title = "-log2(p_adjust)")) +
# #xlab("\n\nAhead AADT") +
# #theme_minimal() +
# atheme +
# #theme(strip.text.y = element_text(angle = 0)) +
# #theme(#axis.text.x=element_blank(),
# #axis.ticks.x=element_blank(),
# #axis.text.y=element_blank(),
# #axis.title.y.left = element_blank(),
# #axis.ticks.y =element_blank()
# #axis.title.x = element_blank()
# #) +
# theme(
# panel.background = element_rect(fill='transparent'), #transparent panel bg
# plot.background = element_rect(fill='transparent', color=NA), #transparent plot bg
# #panel.grid.major = element_blank(), #remove major gridlines
# #panel.grid.minor = element_blank(), #remove minor gridlines
# #legend.background = element_rect(fill='transparent'), #transparent legend bg
# #legend.box.background = element_rect(fill='transparent'), #transparent legend panel
# panel.grid.major = element_line(colour = "grey40"),
# panel.grid.minor = element_line(colour = "grey40")
# ) +
# theme(axis.title.x.bottom = element_text(color="grey13"),
# strip.text = element_text(color = "black", face= "bold"))
#
# ggsave(HubHeatPlot, filename = paste("HubHeatPlot",
# Species, Tissue, Type, ii, sep="_"), path = pathPlots ,
# device='png', dpi=300, width = 12,height = 8)
HUB_GS_MEs_Filtered_List[[HUB_GS_MEs_Filtered_List_length+1]] <- HUB_C_ANNO_Scatter_omics_Filtered
names(HUB_GS_MEs_Filtered_List)[[HUB_GS_MEs_Filtered_List_length+1]] <- MODULE
}
}
HUB_GS_List_Filtered[[HUB_GS_List_Filtered_length+1]] <- HUB_GS_MEs_Filtered_List
names(HUB_GS_List_Filtered)[[HUB_GS_List_Filtered_length+1]] <- paste("HUB_GS", Tissue, Type, Analysis, sep="_")
}
## [1] 20551 21
## [1] 20551 21
## [1] 20551 21
## [1] 20551 21
## [1] 20551 21
## [1] 20551 21
## [1] 20551 21
## [1] 20551 21
## [1] 20551 21
## [1] 20551 21
## [1] 20551 21
## [1] 20551 21
## [1] 20551 21
## [1] 20551 21
## [1] 20551 21
## [1] 20551 21
## [1] 20551 21
## [1] 20551 21
## [1] 20551 21
## [1] 20551 21
## [1] 20551 21
## [1] 20551 21
## [1] 20551 21
## [1] 20551 21
## [1] 20551 21
## [1] 15590 22
## [1] 15590 22
## [1] 15590 22
## [1] 15590 22
## [1] 15590 22
## [1] 15590 22
## [1] 15590 22
## [1] 15590 22
## [1] 15590 22
## [1] 15590 22
## [1] 15590 22
## [1] 15590 22
## [1] 15590 22
## [1] 15590 22
## [1] 15590 22
## [1] 15590 22
## [1] 15590 22
## [1] 15590 22
###########
#Save Data#
###########
saveRDS(HUB_GS_List_Filtered, file = paste0(file.path(path_Output_WGCNA, paste(paste(Species, Year, Season, Analysis, "HUB_GeneSig_Filtered", Date, sep="_"), ".rds", sep=""))))
HUB_GS_List_Filtered <- readRDS(file = paste0(file.path(path_Output_WGCNA, paste(paste(Species, Year, Season, Analysis, "HUB_GeneSig_Filtered", Date, sep="_"), ".rds", sep=""))))
# HUB_GS_List_Filtered_Plots <- list()
# HUB_GS_List_Filtered_DFs <- list()
# for (WGCNAset in names(HUB_GS_List)) {
#
# Tissue <- sub(paste0(".*", "(Gill|Liver)", ".*"), "\\1", WGCNAset)
# Type <- sub(paste0(".*", "(RNA|SSU)", ".*"), "\\1", WGCNAset)
# Analysis <- sub(paste0(".*", "(WGCNA)", ".*"), "\\1", WGCNAset)
# WGCNASET<- HUB_GS_List[[WGCNAset]]
# ModsOfInterst<- ModsOfInterst_list[[paste("ModsOfInterest", Tissue, Type, sep="_")]]
#
# for (Trait in names(ModsOfInterst)) {
# for (MODULE in names(WGCNASET[names(WGCNASET) %in% rownames(ModsOfInterst[[Trait]])])) {
# tryCatch({
# HUB_GS_List_Filtered_Plots_length <- length(HUB_GS_List_Filtered_Plots)
# HUB_GS_List_Filtered_DFs_length <- length(HUB_GS_List_Filtered_DFs)
#
# TraitGS <- paste("GS._", Trait, sep="")
# MEset <- WGCNASET[[MODULE]]
# MEsetGenes <- na.omit(MEset[abs(MEset[[TraitGS]]) > 0.3, ]$Geneid)
#
# MEset <- MEset %>%
# dplyr::filter(ModuleMembership > 0.8, Geneid %in% MEsetGenes)
#
# if ("Description" %in% colnames(MEset)) {
# MEset_long <- as.data.frame(MEset %>%
# separate_rows(Description, p.adjust, category, subcategory, sep = "/"))
# } else {
# MEset_long <- as.data.frame(MEset)
# }
#
# #########################################################################################
# #Select Top 5 Genes for TraitSignificance and Connectivity Aside from Pathway Membership#
# Genes_Top_Trait<- as.data.frame(MEset_long %>%
# dplyr::arrange(desc(abs(!!as.name(TraitGS)))) %>%
# dplyr::distinct(Geneid, .keep_all = TRUE) %>%
# dplyr::slice(1:5))$Geneid
#
# Genes_Top_adjSums<- as.data.frame(MEset_long %>%
# dplyr::arrange(desc(adjSums)) %>%
# dplyr::distinct(Geneid, .keep_all = TRUE) %>%
# dplyr::slice(1:5))$Geneid
#
# ####################################################
# #Select top 10 Pathways and top 5 Genes within them#
# if ("Description" %in% colnames(MEset_long)) {
# Description_top10<- as.data.frame(MEset_long %>%
# dplyr::arrange(desc(p.adjust)) %>%
# dplyr::distinct(Description, .keep_all = TRUE) %>%
# dplyr::slice(1:5))$Description
#
# MEset_long_top10 <- MEset_long[MEset_long$Description %in% Description_top10,]
#
# Genes_Description_top <- as.data.frame(MEset_long_top10 %>%
# dplyr::group_by(Description) %>%
# dplyr::arrange(desc(adjSums)) %>% # Order by adjSums in descending order
# dplyr::distinct(Geneid, .keep_all = TRUE) %>% # Keep distinct GeneID values
# dplyr::slice(1:5))$Geneid
#
# MEset_long_top10_top50_Trait5 <- MEset_long_top10[MEset_long_top10$Geneid %in%
# c(Genes_Description_top),]
# MEset_long_top10_Trait5 <- MEset_long[MEset_long$Geneid %in%
# c(Genes_Top_adjSums,Genes_Top_Trait),]
#
# MEset_long_top10_top50_Trait5 <- rbind(MEset_long_top10_top50_Trait5, MEset_long_top10_Trait5)
#
# } else {
# print(paste("No Description in", Tissue, Type, MODULE))
# MEset_long_top10_top50_Trait5 <- MEset_long[MEset_long$Geneid %in%
# c(Genes_Top_adjSums,Genes_Top_Trait),]
# }
#
#
# MEset_long_top10_top50_Trait5<- MEset_long_top10_top50_Trait5 %>%
# dplyr::arrange(desc(abs(!!as.name(TraitGS))))
#
# write.csv2(MEset_long_top10_top50_Trait5, file = paste0(file.path(path_Output_WGCNA,
# paste(paste("GS_TraitFilter", Analysis, Tissue, Type, "Corr_filter",
# Corr_Filter,
# MODULE, Trait, sep="_"), "csv", sep="."))))
#
#
#
#
# if ("Description" %in% colnames(MEset_long_top10_top50_Trait5)) {
# HubHeatPlot <- MEset_long_top10_top50_Trait5 %>%
# mutate(Description = reorder(Description, p.adjust),
# GeneCombo = reorder(GeneCombo, desc(adjSums))) %>% #!!as.name(TraitGS)
# ggplot(., aes(x = GeneCombo, y = Description, fill = !!as.name(TraitGS), size = adjSums)) +
# geom_point(shape = 25)
# } else {
# MEset_long_top10_top50_Trait5$Description <- "No Enrichment"
# HubHeatPlot <- MEset_long_top10_top50_Trait5 %>%
# mutate(GeneCombo = reorder(GeneCombo, desc(adjSums))) %>% #!!as.name(TraitGS)
# ggplot(., aes(x = GeneCombo, y = Description, fill = !!as.name(TraitGS), size = adjSums)) +
# geom_point(shape = 25)
# }
#
# HubHeatPlot <- HubHeatPlot +
# scale_fill_gradient2(name = paste("GS", sub("GS._", "", TraitGS)), low = "#e06663",
# mid = "white", high = "#327eba") +
# scale_size_continuous(name = expression("Connectivity K"[IN]), range = c(3, 10)) +
# theme(axis.text.x = element_text(angle = 60, hjust = 1)) +
# theme_minimal() + atheme +
# theme(
# panel.background = element_rect(fill='transparent'), #transparent panel bg
# plot.background = element_rect(fill='transparent', color=NA), #transparent plot bg
# panel.grid.major = element_line(colour = "grey40"),
# panel.grid.minor = element_line(colour = "grey40"),
# legend.title = element_text( size = 9),
# legend.text = element_text(size = 9,face = "bold")) +
# theme(axis.title.x.bottom = element_text(color="grey13"),
# strip.text = element_text(color = "black", face= "bold")) +
# labs(x = "GeneID [Human-Zander]", y = "KEGG Pathway Description") +
# ggtitle(paste("Heatplot for Top5 Genes in pathways, in connectivity and corrleation to", Trait, "for",
# paste(Tissue, Type, sub("ME", "", MODULE), sep="-")))
#
# ggsave(HubHeatPlot, filename = paste(paste(
# Species, Year, Season, Analysis, Tissue, Type, Trait, "HubHeatPlot_Filtered", sub("ME", "RNA", MODULE), sep="_"),".png", sep=""), path = pathPlots ,
# device='png', dpi=300, width = 10,height = 5)
# }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
#
# HUB_GS_List_Filtered_Plots[[HUB_GS_List_Filtered_Plots_length+1]] <- HubHeatPlot
# names(HUB_GS_List_Filtered_Plots)[[HUB_GS_List_Filtered_Plots_length+1]] <- paste("HubHeatPlot", Tissue, Type, MODULE, Trait, sep="_")
#
# HUB_GS_List_Filtered_DFs[[HUB_GS_List_Filtered_DFs_length+1]] <- MEset_long_top10_top50_Trait5
# names(HUB_GS_List_Filtered_DFs)[[HUB_GS_List_Filtered_DFs_length+1]] <- paste("HubHeatDF", Tissue, Type, MODULE, Trait, sep="_")
#
# }
# }
# }
tpms <- list("tpmGill "= tpm_RNA_Gill, "tpmLiver" = tpm_RNA_Liver)
tpm_value <- 1; print(paste("TPM value =", tpm_value))
## [1] "TPM value = 1"
Module_Gene_Heatmap_list <- list()
for (WGCNAset in names(WGCNA_list)[grepl("RNA", names(WGCNA_list))]) {
tryCatch({
require(tidyverse); require(WGCNA)
Module_Gene_Heatmap_list_length <- length(Module_Gene_Heatmap_list)
print(WGCNAset)
Tissue <- sub(paste0(".*", "(Gill|Liver)", ".*"), "\\1", WGCNAset)
Type <- sub(paste0(".*", "(RNA|SSU)", ".*"), "\\1", WGCNAset)
omics_data <- as.data.frame(WGCNA_list[[WGCNAset]][grepl("omics_data", names(WGCNA_list[[WGCNAset]]))][[1]])
MEs <- as.data.frame(WGCNA_list[[WGCNAset]][grepl("MEs", names(WGCNA_list[[WGCNAset]]))][[1]])
SAM <- as.data.frame(WGCNA_list[[WGCNAset]][grepl("datTraits", names(WGCNA_list[[WGCNAset]]))][[1]])
TPM <- tpms[[names(tpms)[grepl(Tissue, names(tpms))]]]
TPM <- TPM[names(TPM) %in% rownames(SAM)]
TPM$Geneid <- rownames(TPM)
TPM <-dplyr::left_join(TPM, SLUCGeneManual[c("Geneid", "Human_SYMBOL_Manual")])
TPM$Human_SYMBOL_Manual_unique <- make.unique(TPM$Human_SYMBOL_Manual, sep = "_")
#This is important because WGCNA replacec special charactersin in gene names by dot#
TPM$GeneidWGCNA <-gsub("[[:punct:]]+", ".", TPM$Geneid)
HUBs <- HUB_GS_List[[paste("HUB_GS", Tissue, Type, Analysis, sep="_")]]
Module_list <- list()
for (Hub in names(HUBs)) {
tryCatch({
Module_list_length <- length(Module_list)
Samples <- if (Tissue == "Gill" ) {
Samples <- Samples_RNA_Gill }else {
Samples <- Samples_RNA_Liver }
#Selection of Sample file
SAMDF <- if (Tissue == "Gill" ) {
SAMDF <- SAMDF_RNA_Gill } else {SAMDF <- SAMDF_RNA_Liver}
FILENAME2 <- paste(paste(Species, Year, Season, Analysis, Tissue, Type, "TPM-Module-Heatmap", sub("ME", "RNA", Hub), sep="_"), ".png", sep="")
TITLE <- draw_label_themeRKwhite(
paste(Species, Tissue, Type,Analysis, paste(Type,sub("ME", "", Hub)), sep="_"),
element = "plot.title", x = 0.05, hjust = 0, vjust = 1)
HUB <- HUBs[[Hub]]
Input <- TPM[TPM$GeneidWGCNA %in% HUB$Geneid, ]
#print(paste(Hub, "genes before TPM filter", tpm_value, ":", dim(Input)[1]))
Input <- Input[(rowSums(Input > tpm_value) >= 3),] #TPM Filter
#print(paste(Hub, "genes after TPM filter", tpm_value, ":", dim(Input)[1]))
rownames(Input) <- Input$Human_SYMBOL_Manual_unique
GeneHeatPlotRKnoClust_SL_tpm(Species = Species, Input = Input, Samples = Samples, SAMDF = SAMDF,
TITLE = TITLE, filename= FILENAME2)
Module_list[[Module_list_length+1]] <- hmap
names(Module_list)[[Module_list_length+1]] <- Hub
##################
#Plot a selection#
##################
if (Type == "RNA" & Tissue == "Gill" & Hub %in% c("ME2", "ME7", "ME11")) {
plot(GeneHeatPlotRKnoClust_SL_tpm_plot)
} else if (Tissue == "Liver" & Hub %in% c("ME1", "ME3", "ME6", "ME7")) {
plot(GeneHeatPlotRKnoClust_SL_tpm_plot)
} else if (Type == "SSU" & Hub %in% c("ME1", "ME2", "ME3")) {
plot(GeneHeatPlotRKnoClust_SL_tpm_plot)
} else {
# print("Plots are saved to the pathPlots")
}
}, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
}
Module_Gene_Heatmap_list[[Module_Gene_Heatmap_list_length+1]] <- Module_list
names(Module_Gene_Heatmap_list)[[Module_Gene_Heatmap_list_length+1]] <- paste("Module_Gene_Heatmap", Type, Tissue, sep="_")
}, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
}
## [1] "RNA_Gill_WGCNA"
## [1] "RNA_Liver_WGCNA"
######################################################################
#Pathways are sliced to top 10 if more than 10 pathways in one Module#
######################################################################
HUB_GS_List <- readRDS(file = paste0(file.path(path_Output_WGCNA, paste(paste(Species, Year, Season, Analysis, "HUB_GeneSig", Date, sep="_"), ".rds", sep=""))))
ModsOfInterst_list <- readRDS(file = paste0(file.path(path_Output_WGCNA, paste(paste(Species, Year, Season, Analysis, "ModsOfInterst_list", Date, sep="_"), ".rds", sep=""))))
Abiotics <- c("O2","Salinity","SecchiDepth")
###########################################################
#Modify ModsOfInterst to only Positive values for Salinity#
###########################################################
for (i in names (ModsOfInterst_list)) {
ModsOfInterst_list[[i]]$Salinity <-
ModsOfInterst_list[[i]]$Salinity[ModsOfInterst_list[[i]]$Salinity[1] > 0, , drop = FALSE]
}
HUB_GS_List_Filtered_DFs <- list()
for (WGCNAset in names(HUB_GS_List)) {
Tissue <- sub(paste0(".*", "(Gill|Liver)", ".*"), "\\1", WGCNAset)
Type <- sub(paste0(".*", "(RNA|SSU)", ".*"), "\\1", WGCNAset)
Analysis <- sub(paste0(".*", "(WGCNA)", ".*"), "\\1", WGCNAset)
WGCNASET<- HUB_GS_List[[WGCNAset]]
ModsOfInterst<- ModsOfInterst_list[[paste("ModsOfInterest", Tissue, Type, sep="_")]]
###########################################
#Remove all ME0 and only focus on Abiotics#
ModsOfInterst <- ModsOfInterst[names(ModsOfInterst) %in% Abiotics]
remove_rows_ME0 <- function(df) {
df[!(rownames(df) == "ME0"), ]}
ModsOfInterst <- lapply(ModsOfInterst, remove_rows_ME0)
for (Trait in names(ModsOfInterst)) {
for (MODULE in names(WGCNASET[names(WGCNASET) %in% rownames(ModsOfInterst[[Trait]])])) {
print(paste(Trait, MODULE))
HUB_GS_List_Filtered_DFs_length <- length(HUB_GS_List_Filtered_DFs)
TraitGS <- paste("GS._", Trait, sep="")
MEset <- WGCNASET[[MODULE]]
MEsetGenes <- na.omit(MEset[abs(MEset[[TraitGS]]) > 0.3, ]$Geneid)
MEset <- MEset %>%
dplyr::filter(ModuleMembership > 0.8, Geneid %in% MEsetGenes)
if ("Description" %in% colnames(MEset)) {
MEset_long <- as.data.frame(MEset %>%
separate_rows(Description, p.adjust, category, subcategory, sep = "/"))
} else {
MEset_long <- as.data.frame(MEset)
}
###################################
#Select top 10 Pathways per Module#
####################################
if ("Description" %in% colnames(MEset_long)) {
Description_top10<- as.data.frame(MEset_long %>%
dplyr::arrange(desc(p.adjust)) %>%
dplyr::distinct(Description, .keep_all = TRUE) %>%
dplyr::slice(1:10))$Description
MEset_long_top10 <- MEset_long[MEset_long$Description %in% Description_top10,]
# Genes_Description_top <- as.data.frame(MEset_long_top10 %>%
# dplyr::group_by(Description) %>%
# dplyr::arrange(desc(adjSums)) %>% # Order by adjSums in descending order
# dplyr::distinct(Geneid, .keep_all = TRUE)
# #%>% dplyr::slice(1:5))$Geneid
# )
# MEset_long_top10_top50_Trait5 <- MEset_long_top10[MEset_long_top10$Geneid %in%
# c(Genes_Description_top),]
# MEset_long_top10_Trait5 <- MEset_long[MEset_long$Geneid %in%
# c(Genes_Top_adjSums,Genes_Top_Trait),]
#
# MEset_long_top10_top50_Trait5 <- rbind(MEset_long_top10_top50_Trait5, MEset_long_top10_Trait5)
} else {
print(paste("No Description in", Tissue, Type, MODULE, Trait))
MEset_long_top10 <- MEset_long
}
HUB_GS_List_Filtered_DFs[[HUB_GS_List_Filtered_DFs_length+1]] <- MEset_long_top10
names(HUB_GS_List_Filtered_DFs)[[HUB_GS_List_Filtered_DFs_length+1]] <- paste("HubHeatDF", Tissue, Type, MODULE, Trait, sep="_")
}
}
}
## [1] "O2 ME7"
## [1] "O2 ME2"
## [1] "O2 ME34"
## [1] "No Description in Gill RNA ME34 O2"
## [1] "O2 ME21"
## [1] "O2 ME11"
## [1] "O2 ME25"
## [1] "O2 ME38"
## [1] "No Description in Gill RNA ME38 O2"
## [1] "O2 ME10"
## [1] "No Description in Gill RNA ME10 O2"
## [1] "O2 ME28"
## [1] "No Description in Gill RNA ME28 O2"
## [1] "O2 ME4"
## [1] "Salinity ME5"
## [1] "Salinity ME6"
## [1] "No Description in Gill RNA ME6 Salinity"
## [1] "Salinity ME37"
## [1] "Salinity ME1"
## [1] "SecchiDepth ME3"
## [1] "SecchiDepth ME22"
## [1] "SecchiDepth ME24"
## [1] "No Description in Gill RNA ME24 SecchiDepth"
## [1] "SecchiDepth ME23"
## [1] "SecchiDepth ME13"
## [1] "SecchiDepth ME12"
## [1] "No Description in Gill RNA ME12 SecchiDepth"
## [1] "O2 ME2"
## [1] "O2 ME14"
## [1] "O2 ME8"
## [1] "O2 ME29"
## [1] "No Description in Liver RNA ME29 O2"
## [1] "Salinity ME11"
## [1] "No Description in Liver RNA ME11 Salinity"
## [1] "Salinity ME18"
## [1] "No Description in Liver RNA ME18 Salinity"
## [1] "Salinity ME25"
## [1] "No Description in Liver RNA ME25 Salinity"
## [1] "Salinity ME38"
## [1] "No Description in Liver RNA ME38 Salinity"
## [1] "Salinity ME10"
## [1] "Salinity ME36"
## [1] "No Description in Liver RNA ME36 Salinity"
## [1] "SecchiDepth ME9"
## [1] "SecchiDepth ME6"
## [1] "SecchiDepth ME1"
WGCNA_PatwaysTissueBarplot <- list()
for (Abiotic in Abiotics) {
WGCNA_PatwaysTissueBarplotLength <- length(WGCNA_PatwaysTissueBarplot)
################
#Liver and Gill#
################
Bound_Hubs_Abiotic_Gill <- plyr::rbind.fill(HUB_GS_List_Filtered_DFs[grepl(Abiotic,
names(HUB_GS_List_Filtered_DFs)) & grepl("Gill", names(HUB_GS_List_Filtered_DFs))])
Bound_Hubs_Abiotic_Gill$Module <- paste("Gill-RNA", Bound_Hubs_Abiotic_Gill$Module, sep="-")
Bound_Hubs_Abiotic_Gill$TissueType <- paste("Gill-RNA")
Bound_Hubs_Abiotic_Liver <- plyr::rbind.fill(HUB_GS_List_Filtered_DFs[grepl(Abiotic,
names(HUB_GS_List_Filtered_DFs)) & grepl("Liver", names(HUB_GS_List_Filtered_DFs))])
Bound_Hubs_Abiotic_Liver$Module <- paste("Liver-RNA", Bound_Hubs_Abiotic_Liver$Module, sep="-")
Bound_Hubs_Abiotic_Liver$TissueType <- paste("Liver-RNA")
Bound_Hubs_Abiotic <- plyr::rbind.fill(Bound_Hubs_Abiotic_Liver, Bound_Hubs_Abiotic_Gill)
Bound_Hubs_Abiotic <- Bound_Hubs_Abiotic[!is.na(Bound_Hubs_Abiotic$Description),]
#Bound_Hubs_Abiotic[Bound_Hubs_Abiotic$subcategory == "Global and overview maps",]$subcategory <- "Metabolism"
if ("Global and overview maps" %in% Bound_Hubs_Abiotic$subcategory) {
Bound_Hubs_Abiotic[Bound_Hubs_Abiotic$subcategory %in%
"Global and overview maps",]$subcategory <- "Metabolism"
}
if ("Thermogenesis" %in% Bound_Hubs_Abiotic$Description) {
Bound_Hubs_Abiotic <- Bound_Hubs_Abiotic[-which(Bound_Hubs_Abiotic$Description %in% c("Thermogenesis")), ]
}
#######################################
#Order the subcategories for categorys#
Bound_Hubs_Abiotic$subcategory <- factor(Bound_Hubs_Abiotic$subcategory, levels =
unique(Bound_Hubs_Abiotic$subcategory[order(Bound_Hubs_Abiotic$category)]))
#########
#Barplot#
#########
Abiotic_tissue<- Bound_Hubs_Abiotic %>%
group_by(Description, TissueType, subcategory) %>%
summarise(Count = n())
#Abiotic_tissue$DescriptionWrap <- sjmisc::word_wrap(Abiotic_tissue$Description, 40)
Abiotic_tissue_plot <- Abiotic_tissue %>%
ggplot(., aes(x=Count, y=Description, fill = TissueType))+
geom_bar(stat="identity") +
labs(title = paste("Module-Pathways correlated with", Abiotic),
x = "",
y = "") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_x_sqrt(breaks = c(10, 100, 200),
limits =c(0, 200)) +
#scale_x_continuous(limits = c(0, 100)) +
#scale_fill_brewer(palette = "Set1") +
facet_grid(subcategory ~., scales = "free", space = "free") +
#xlab("\n\nAhead AADT") +
#theme_minimal() +
atheme +
theme(strip.text.y.left = element_blank(),
strip.text.y.right = element_text(angle = 0, hjust = 0,
size=10, face="bold", color= "black"),
strip.text.x.bottom = element_blank(),
legend.text = element_text(size=10),
legend.title = element_blank())+
theme(panel.spacing = unit(0, "lines"),
panel.spacing.y=unit(0.2, "lines"),
legend.position = "none")
HEIGHt <- Bound_Hubs_Abiotic %>%
group_by(Description, TissueType, subcategory) %>%
summarise(Count = n())
HEIGHT <- length(unique(HEIGHt$subcategory))*0.4
ggsave(Abiotic_tissue_plot, filename = paste(paste(Species, Year, Season, Analysis, "PatwaysTissueBarplot", Abiotic, sep="_"), ".png", sep=""), path = pathPlots , device='png', dpi=300, width = 10, height = HEIGHT)
WGCNA_PatwaysTissueBarplot[[WGCNA_PatwaysTissueBarplotLength+1]] <- Abiotic_tissue_plot
names(WGCNA_PatwaysTissueBarplot)[[WGCNA_PatwaysTissueBarplotLength+1]] <- paste(Abiotic, "WGCNA_PatwaysTissueBarplot", sep="_")
}
##############
#Summary Plot#
##############
cowplot::plot_grid(WGCNA_PatwaysTissueBarplot$O2_WGCNA_PatwaysTissueBarplot, labels = c("A"), ncol = 1) -> part_1
cowplot::plot_grid(WGCNA_PatwaysTissueBarplot$SecchiDepth_WGCNA_PatwaysTissueBarplot, WGCNA_PatwaysTissueBarplot$Salinity_WGCNA_PatwaysTissueBarplot, labels = c("B", "C"), ncol = 1, rel_widths = c(1,1)) -> part_2
cowplot::plot_grid(part_1, part_2, labels = c("", ""), rel_widths = c(1,1), ncol = 2) -> part_3
ggsave(part_3, filename = paste(paste(Species, Year, Season, Analysis, "PatwaysTissueBarplot", Date, sep="_"),".png", sep=""), path = pathPlots , device='png', dpi=300, width = 18,height = 10)
part_3
Instead of querying existing resources look for correlations in your own dataset to find out which genes have similar expression. There are many tools that can analyze your data for correlation. A popular tool is Weighted Gene Correlation Network Analysis (WGCNA) which takes expression data and calculates functional modules. As a simple example we can transform our expression dataset into a correlation matrix.
Using the Cytoscape App, aMatReader, we transform our adjacency matrix into an interaction network. First we filter the correlation matrix to contain only the strongest connections (for example, only correlations greater than 0.9).
WGCNA_list <- readRDS(file = paste0(file.path(path_Output_WGCNA, paste(paste(Species, Year, Season, Analysis, Date, sep="_"), ".rds", sep=""))))
#You need to download and Open Cytoscape on your computer for this to work
#http://www.bioconductor.org/packages/devel/bioc/vignettes/rWikiPathways/inst/doc/Pathway-Analysis.html
#https://yulab-smu.top/biomedical-knowledge-mining-book/clusterprofiler-go.html
# library(tidyverse)
# library(dplyr)
# library(plyr)
# library(clusterProfiler)
# library(enrichplot)
# library(ggplot2)
# library("org.Hs.eg.db")
# library("org.Dr.eg.db")
# library("org.Mm.eg.db")
# library(DOSE)
# library(rWikiPathways)
# library(jsonlite)
# library("DOSE")
# library("GO.db")
# library("GSEABase")
# library("dplyr")
# library("tidyr")
# library("stringr")
# library("RColorBrewer")
# library("rWikiPathways")
# library("RCy3")
###########
#Cytoscape#
###########
#list of cytoscape apps to install
#Open Cytoscape
# cytoscapePing()
# #list of app to install Do that once!
# installApp('WikiPathways')
# installApp('CyTargetLinker')
# installApp('stringApp')
# installApp('enrichmentMap')
# installApp('autoannotate')
# installApp('wordcloud')
# installApp('stringapp')
# installApp('aMatReader')
# installApp('clustermaker2')
# cytoscapeVersionInfo ()
# installApp('STRINGapp')
# installApp('aMatReader')
# installApp('clusterMaker2')
# The following setting is important, do not omit.
# library(WGCNA)
# options(stringsAsFactors = FALSE);
#
#
# datExpr <- WGCNA_list$RNA_Gill_WGCNA$RNA_Gill_omics_data
# moduleColors <- WGCNA_list$RNA_Gill_WGCNA$RNA_Gill_moduleColors
# GeneAnno <- WGCNA_list$RNA_Gill_WGCNA$RNA_Gill_GeneAnno
#
# TOM = TOMsimilarityFromExpr(datExpr, power = 6, nThreads =8);
#
#
# for (i in names(GeneAnno)){
# modules = as.numeric(sub("GeneAnno_ME", "", i))
#
# colors = labels2colors(modules)
# genes = names(datExpr)
# inModule = is.finite(match(moduleColors, colors));
# modGenes = genes[inModule];
# modGenesAnno <-SLUCGeneManual[SLUCGeneManual$rowname %in%
# modGenes,]
# modGenesdf <- data.frame(rowname = modGenes)
# modGenesAnnosubset <- modGenesAnno[modGenesAnno$rowname %in% modGenesdf$rowname,]
#
# result <- merge(modGenesAnnosubset, modGenesdf, all = TRUE)
# result <-result %>% mutate(TomAnno = coalesce(Human_SYMBOL_Manual, rowname))
#
# # Reorder result based on the order_vector
# order_vector <- order(match(result$rowname, modGenesdf$rowname))
# result_ordered <- result[order_vector, ]
# modGenesHuman_SYMBOL_Manual <- make.unique(result_ordered$TomAnno, sep = "_")
#
# # Select the corresponding Topological Overlap
# modTOM = TOM[inModule, inModule];
# dimnames(modTOM) = list(modGenesHuman_SYMBOL_Manual, modGenesHuman_SYMBOL_Manual)
#
# # Export the network into edge and node list files Cytoscape can read
# cyt = exportNetworkToCytoscape(modTOM,
# edgeFile = paste(file.path(path_Output_WGCNA,"CytoscapeInput-edges-"),
# paste(modules, collapse="-"), ".txt", sep=""),
# nodeFile = paste(file.path(path_Output_WGCNA, "CytoscapeInput-nodes-"),
# paste(modules, collapse="-"), ".txt", sep=""),
# weighted = TRUE,
# threshold = 0.5,
# nodeNames = modGenes,
# altNodeNames = modGenesHuman_SYMBOL_Manual,
# nodeAttr = moduleColors[inModule]);
# }
#
#
# for (i in names(GeneAnno)){
# modules = as.numeric(sub("GeneAnno_ME", "", i))
#
# colors = labels2colors(modules)
#
# edge <- read.delim(file.path(path_Output_WGCNA, paste("CytoscapeInput-edges-", modules, ".txt", sep="")))
# colnames(edge)
# colnames(edge) <- c("source", "target","weight","direction","fromAltName","toAltName")
#
# node <- read.delim(file.path(path_Output_WGCNA, paste("CytoscapeInput-nodes-", modules, ".txt", sep="")))
# colnames(node)
# colnames(node) <- c("id","altName","node_attributes")
#
# createNetworkFromDataFrames(node,edge, title=paste(modules, "network", sep=""), collection="DataFrame Example")
#
# ################ customise the network visualization ##################################
# # use other pre-set visual style
# setVisualStyle('Marquee')
#
# # set up my own style
# style.name = "myStyle"
# defaults <- list(NODE_SHAPE="diamond",
# NODE_SIZE=10,
# EDGE_TRANSPARENCY=120,
# NODE_LABEL_POSITION="W,E,c,0.00,0.00")
# nodeLabels <- mapVisualProperty('node label','id','p')
# nodeFills <- mapVisualProperty('node fill color','node_attributes','d',c("A","B"), c("#FF9900","#66AAAA"))
# arrowShapes <- mapVisualProperty('Edge Target Arrow
# Shape','interaction','d',c("activates","inhibits","interacts"),c("Arrow","T","None"))
# edgeWidth <- mapVisualProperty('edge width','weight','p')
#
# createVisualStyle(style.name, defaults, list(nodeLabels,nodeFills,arrowShapes,edgeWidth))
# setVisualStyle(style.name)
#
# }
#
#
#
#
# #Import Networks to Cytoscape
# cytoscapePing () # make sure cytoscape is open
# cytoscapeVersionInfo ()
#
# for (i in names(GeneAnno)){
# modules = as.numeric(sub("GeneAnno_ME", "", i))
#
# colors = labels2colors(modules)
#
# edge <- read.delim(file.path(path_Output_WGCNA, paste("CytoscapeInput-edges-", modules, ".txt", sep="")))
# colnames(edge)
# colnames(edge) <- c("source", "target","weight","direction","fromAltName","toAltName")
#
# node <- read.delim(file.path(path_Output_WGCNA, paste("CytoscapeInput-nodes-", modules, ".txt", sep="")))
# colnames(node)
# colnames(node) <- c("id","altName","node_attributes")
#
# createNetworkFromDataFrames(node,edge, title=paste(modules, "network", sep=""), collection="DataFrame Example")
#
# ################ customise the network visualization ##################################
# # use other pre-set visual style
# setVisualStyle('Marquee')
#
# # set up my own style
# style.name = "myStyle"
# defaults <- list(NODE_SHAPE="diamond",
# NODE_SIZE=10,
# EDGE_TRANSPARENCY=120,
# NODE_LABEL_POSITION="W,E,c,0.00,0.00")
# nodeLabels <- mapVisualProperty('node label','id','p')
# nodeFills <- mapVisualProperty('node fill color','node_attributes','d',c("A","B"), c("#FF9900","#66AAAA"))
# arrowShapes <- mapVisualProperty('Edge Target Arrow
# Shape','interaction','d',c("activates","inhibits","interacts"),c("Arrow","T","None"))
# edgeWidth <- mapVisualProperty('edge width','weight','p')
#
# createVisualStyle(style.name, defaults, list(nodeLabels,nodeFills,arrowShapes,edgeWidth))
# setVisualStyle(style.name)
# }
# ego<- ORA_list$Gill_RNA_WGCNA_MODULES_Corr_Filter_0.8$EGOS
#########################
#Cytoscape Visualization#
#########################
############
#Enrichtmap#
############
# Create an enrichment map with the returned g:Profiler results. An enrichment map is a different sort of network. Instead of nodes representing genes, nodes represent pathways or functions. Edges between these pathways or functions represent shared genes or pathway crosstalk. An enrichment map is a way to visualize your enrichment results to help reduce redundancy and uncover main themes. Pathways can also be explored in detail using the features available through the App in Cytoscape.
#https://bioconductor.github.io/BiocWorkshops/cytoscape-automation-in-r-using-rcy3.html
# A <- list()
# for (i in names(.GlobalEnv[[name]]$`egoCL-Liver`)) {
# B <- as.data.frame(.GlobalEnv[[name]]$`egoCL-Liver`[[i]])
# A[[i]] <- B}
#A <- ldply(A, data.frame) Collapses the List object to one dataframe, might be more suitable to do per Cluster
# for (i in names(ego)[grepl("EGO", names(ego))]) {
# tryCatch({
# g <- paste(i)
# for (ii in names(ego[[i]])) {
# tryCatch({
# B <- as.data.frame(ego[[i]][[ii]])
# gg <- paste(names(ego[[i]][ii]))
# ## extract a dataframe with results from object of type enrichResult
# egobp.results.df <- B
# ## create a new column for term size from BgRatio
# egobp.results.df$term.size <- gsub("/(\\d+)", "", B$BgRatio)
# ## filter for term size to keep only term.size => 3, gene count >= 5 and subset
# #egobp.results.df <-
# # egobp.results.df[which(egobp.results.df[,'term.size'] >= 3 & egobp.results.df[,'Count'] >= 5),]
#
# egobp.results.df <- egobp.results.df[c("ID", "Description", "pvalue", "qvalue", "geneID")]
# ## format gene list column
# egobp.results.df$geneID <- gsub("/", ",", egobp.results.df$geneID)
# ## add column for phenotype
# egobp.results.df <- cbind(egobp.results.df, phenotype=1)
# egobp.results.df <- egobp.results.df[, c(1, 2, 3, 4, 6, 5)]
# ## change column headers
# colnames(egobp.results.df) <- c("Name","Description", "pvalue","qvalue", "phenotype","genes")
# egobp.results.filename <-
# paste0(file.path(path_Output_WGCNA, "Cytoscape-ORA-07.06.2022-"), save_name, "-", gg, ".txt")
# write.table(egobp.results.df,egobp.results.filename,col.name=TRUE,sep="\t",row.names=FALSE,quote=FALSE)
# CPres <-read.delim(file =
# paste0(file.path(path_Output_WGCNA, "Cytoscape-ORA-07.06.2022-"), save_name, "-", gg, ".txt"))
# print(head(CPres))
# #https://bioconductor.github.io/BiocWorkshops/cytoscape-automation-in-r-using-rcy3.html
# em_command = paste('enrichmentmap build analysisType="generic" ',
# 'pvalue=',"0.05", 'qvalue=',"0.05",
# 'similaritycutoff=',"0.25",
# 'coeffecients=',"JACCARD",
# 'enrichmentsDataset1=',egobp.results.filename ,sep=" ")
# em_network_suid <- commandsRun(em_command)
# renameNetwork(gg, network=as.numeric(em_network_suid))
# #Save Picture#
# cluster_png_file_name <-
# file.path(paste(path_Output_WGCNA, "Cytoscape-ORA-07.06.2022-", save_name, "-", gg, ".png",sep=""))
# if(file.exists(cluster_png_file_name)){
# #cytoscape hangs waiting for user response if file already exists. Remove it first
# file.remove(cluster_png_file_name)}
# #export the network
# exportImage(cluster_png_file_name, type = "png")
# #Annotate the Enrichment map to get the general themes that are found in the enrichment results of cluster
# #get the column from the nodetable and node table
# nodetable_colnames <- getTableColumnNames(table="node", network = as.numeric(em_network_suid))
# descr_attrib <- nodetable_colnames[grep(nodetable_colnames, pattern = "GS_DESCR")]
# #Autoannotate the network
# autoannotate_url <- paste("autoannotate annotate-clusterBoosted labelColumn=",
# descr_attrib," maxWords=3 ", sep="")
# current_name <-commandsGET(autoannotate_url)
# }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})}
# }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
# }
Exclucded for knitting, works fine
#You need to download and Open Cytoscape on your computer for this to work
#http://www.bioconductor.org/packages/devel/bioc/vignettes/rWikiPathways/inst/doc/Pathway-Analysis.html
#https://yulab-smu.top/biomedical-knowledge-mining-book/clusterprofiler-go.html
# library(tidyverse)
# library(dplyr)
# library(plyr)
# library(clusterProfiler)
# library(enrichplot)
# library(ggplot2)
# library("org.Hs.eg.db")
# library("org.Dr.eg.db")
# library("org.Mm.eg.db")
# library(DOSE)
# library(rWikiPathways)
# library(jsonlite)
# library("DOSE")
# library("GO.db")
# library("GSEABase")
# library("dplyr")
# library("tidyr")
# library("stringr")
# library("RColorBrewer")
# library("rWikiPathways")
# library("RCy3")
#
# ###########
# #Cytoscape#
# ###########
# #list of cytoscape apps to install
# #Open Cytoscape
# cytoscapePing()
# # #list of app to install Do that once!
# # installApp('WikiPathways')
# # installApp('CyTargetLinker')
# # installApp('stringApp')
# # installApp('enrichmentMap')
# # installApp('autoannotate')
# # installApp('wordcloud')
# # installApp('stringapp')
# # installApp('aMatReader')
# # installApp('clustermaker2')
# # cytoscapeVersionInfo ()
# # installApp('STRINGapp')
# # installApp('aMatReader')
# # installApp('clusterMaker2')
# # The following setting is important, do not omit.
# library(WGCNA)
# options(stringsAsFactors = FALSE);
#
#
#
# datExpr <- WGCNA_list$SSU_Gill_WGCNA$SSU_Gill_omics_data
# moduleColors <- WGCNA_list$SSU_Gill_WGCNA$SSU_Gill_moduleColors
# GeneAnno <- WGCNA_list$SSU_Gill_WGCNA$SSU_Tax
# TaxAnno <- WGCNA_list$SSU_Gill_WGCNA$SSU_Tax$All
# TOM = TOMsimilarityFromExpr(datExpr, power = 6, nThreads =8);
#
#
# for (i in names(GeneAnno[grepl("SSU", names(GeneAnno))])){
# modules = as.numeric(sub("SSU", "", i))
#
# colors = labels2colors(modules)
# genes = names(datExpr)
# inModule = is.finite(match(moduleColors, colors));
# modGenes = genes[inModule];
#
# modGenesAnno <-TaxAnno[TaxAnno$ASV %in%modGenes,]
# modGenesAnno$rowname <- modGenesAnno$ASV
# modGenesdf <- data.frame(rowname = modGenes)
# modGenesAnnosubset <- modGenesAnno[modGenesAnno$rowname %in% modGenesdf$rowname,]
#
# result <- merge(modGenesAnnosubset, modGenesdf, all = TRUE)
# result <-result %>% mutate(TomAnno = rowname)
#
# # Reorder result based on the order_vector
# order_vector <- order(match(result$rowname, modGenesdf$rowname))
# result_ordered <- result[order_vector, ]
# modGenesHuman_SYMBOL_Manual <- make.unique(result_ordered$TomAnno, sep = "_")
#
# # Select the corresponding Topological Overlap
# modTOM = TOM[inModule, inModule];
# dimnames(modTOM) = list(modGenesHuman_SYMBOL_Manual, modGenesHuman_SYMBOL_Manual)
#
# # Export the network into edge and node list files Cytoscape can read
# #https://support.bioconductor.org/p/95965/
# # My operational answer is to select a threshold that keeps the file size manageable, then use filtering in Cytoscape to interactively choose a threshold that results in an informative plot. This assumes that Cytoscape is only used for visualization; if you're going to use Cytoscape for further analysis, you should probably set the threshold to zero.
# #https://www.biostars.org/p/9514423/
#
# cyt = exportNetworkToCytoscape(modTOM,
# edgeFile = paste(file.path(path_Output_WGCNA,"CytoscapeInput-ASV-edges-"),
# paste(modules, collapse="-"), ".txt", sep=""),
# nodeFile = paste(file.path(path_Output_WGCNA, "CytoscapeInput-ASV-nodes-"),
# paste(modules, collapse="-"), ".txt", sep=""),
# weighted = TRUE,
# threshold = 0.1,
# nodeNames = modGenes,
# altNodeNames = modGenesHuman_SYMBOL_Manual,
# nodeAttr = moduleColors[inModule]);
# }
#
# #Import Networks to Cytoscape
# cytoscapePing () # make sure cytoscape is open
# cytoscapeVersionInfo ()
# for (i in names(GeneAnno[grepl("SSU", names(GeneAnno))])){
# tryCatch({
#
# modules = as.numeric(sub("SSU", "", i))
#
# colors = labels2colors(modules)
#
# edge <- read.delim(file.path(path_Output_WGCNA, paste("CytoscapeInput-ASV-edges-", modules, ".txt", sep="")))
# colnames(edge)
# colnames(edge) <- c("source", "target","weight","direction","fromAltName","toAltName")
#
# node <- read.delim(file.path(path_Output_WGCNA, paste("CytoscapeInput-ASV-nodes-", modules, ".txt", sep="")))
# colnames(node)
# colnames(node) <- c("id","altName","node_attributes")
#
# createNetworkFromDataFrames(node,edge, title=paste(modules, "network", sep=""), collection="DataFrame Example")
#
# ################ customise the network visualization ##################################
# # use other pre-set visual style
# setVisualStyle('Marquee')
# # set up my own style
# style.name = "myStyle"
# defaults <- list(NODE_SHAPE="diamond",
# NODE_SIZE=10,
# EDGE_TRANSPARENCY=120,
# NODE_LABEL_POSITION="W,E,c,0.00,0.00")
# nodeLabels <- mapVisualProperty('node label','id','p')
# nodeFills <- mapVisualProperty('node fill color','node_attributes','d',c("A","B"),
# c("#FF9900","#66AAAA"))
# arrowShapes <- mapVisualProperty('Edge Target Arrow
# Shape','interaction','d',c("activates","inhibits","interacts"),c("Arrow","T","None"))
# edgeWidth <- mapVisualProperty('edge width','weight','p')
#
# createVisualStyle(style.name, defaults, list(nodeLabels,nodeFills,arrowShapes,edgeWidth))
# setVisualStyle(style.name)
# }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})}
# For Visualization in Figure 3B change to cytoscape and do:
# Cytoscape -> Import -> Table -> SSU_Data with Column relMeanASV ->
#Styles -> LabelSize and LableText continous
#https://www.biostars.org/p/192885/#192910
#https://www.biostars.org/p/454313/
#https://manual.cytoscape.org/en/stable/Styles.html#tutorial-3-creating-a-new-style-with-a-continuous-mapping
#https://manual.cytoscape.org/en/stable/Styles.html
#https://www.biostars.org/p/454866/